From 3a52c9ee7e0c25bae81055d56fc5be87c112e47b Mon Sep 17 00:00:00 2001 From: Seemann Date: Sat, 26 Aug 2023 12:39:19 -0400 Subject: [PATCH 001/216] fix error in 004E allowing to run multiple missions --- source/CScriptEngine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 164f6ec1..b7b1da1b 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -220,7 +220,7 @@ namespace CLEO } else { - if (!pScript->IsMission()) *MissionLoaded = false; + if (pScript->IsMission()) *MissionLoaded = false; RemoveScriptFromQueue(pScript, activeThreadQueue); AddScriptToQueue(pScript, inactiveThreadQueue); StopScript(pScript); From 6ffd3ab5c33237e7db2a9ec56f383ee93d171d82 Mon Sep 17 00:00:00 2001 From: Seemann Date: Sat, 26 Aug 2023 15:54:37 -0400 Subject: [PATCH 002/216] reset variable before writing a value from file --- source/CCustomOpcodeSystem.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 411a6b1f..9210b3a5 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1210,9 +1210,11 @@ namespace CLEO { { DWORD hFile; DWORD size; - void *buf; *thread >> hFile >> size; - buf = GetScriptParamPointer(thread); + + SCRIPT_VAR* buf = GetScriptParamPointer(thread); + buf->dwParam = 0; // https://github.com/cleolibrary/CLEO4/issues/91 + if (convert_handle_to_file(hFile)) read_file(buf, size, 1, hFile); return OR_CONTINUE; } From 79dfcb07f1dd0ad06f34e80be5aa33e96808cf91 Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 1 Sep 2023 22:36:14 -0400 Subject: [PATCH 003/216] use readString SDK to get the first argument in 0AF5 --- demo_plugins/IniFiles/IniFiles/IniFiles.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/demo_plugins/IniFiles/IniFiles/IniFiles.cpp b/demo_plugins/IniFiles/IniFiles/IniFiles.cpp index 12100ddf..4fe883fa 100644 --- a/demo_plugins/IniFiles/IniFiles/IniFiles.cpp +++ b/demo_plugins/IniFiles/IniFiles/IniFiles.cpp @@ -212,18 +212,7 @@ class IniFiles { char *strptr; BOOL result; - switch (CLEO_GetOperandType(thread)) - { - case globalVarVString: - case localVarVString: - case globalVarSString: - case localVarSString: - CLEO_ReadStringPointerOpcodeParam(thread, strValue, sizeof(strValue)); - break; - default: - strptr = (char *)CLEO_GetIntOpcodeParam(thread); - strcpy(strValue, strptr); - } + CLEO_ReadStringPointerOpcodeParam(thread, strValue, sizeof(strValue)); CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); From a286b86965cacb48c3a22f6314d976bfc952b9c3 Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 2 Sep 2023 05:22:14 +0200 Subject: [PATCH 004/216] Fixed text buffers size. --- source/CCustomOpcodeSystem.cpp | 2 +- source/CTextManager.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 9210b3a5..193390d2 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -234,7 +234,7 @@ namespace CLEO { } catch (const char * e) { - char str[128]; + char str[MAX_STR_LEN]; sprintf(str, "%s encountered while parsing opcode '%04X' in script '%s'", e, last_opcode, last_thread); Error(str); } diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index d6e8cdb1..245b4a74 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -20,9 +20,9 @@ namespace CLEO const char* (__fastcall * CText__Get)(CText*, int dummy, const char*); DWORD _CText__TKey__locate; - char message_buf_big[7][0x80]; - char message_buf_low[0x80]; - char message_buf_high[0x80]; + char message_buf_big[7][MAX_STR_LEN]; + char message_buf_low[MAX_STR_LEN]; + char message_buf_high[MAX_STR_LEN]; const char * __fastcall CText__TKey__locate(CText__TKey *key, int dummy, const char *gxt, bool& found) { From 491cd0b78d3792473b191bc28b3c9ac5a0d28b8f Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 2 Sep 2023 05:51:47 +0200 Subject: [PATCH 005/216] Check if 'arguments count' param is present before trying to read it. --- source/CCustomOpcodeSystem.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 193390d2..b29500bf 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1632,9 +1632,10 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread) { int label; - DWORD nParams; + *thread >> label; - *thread >> label >> nParams; + DWORD nParams = 0; + if(*thread->GetBytePointer()) *thread >> nParams; ScmFunction* scmFunc = new ScmFunction(thread); From 2fdb81c124686249f7e4e74f55934aa36414908f Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 2 Sep 2023 07:36:01 +0200 Subject: [PATCH 006/216] 0AB2 param count argument made optional. --- source/CCustomOpcodeSystem.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b29500bf..b53935cc 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1712,8 +1712,10 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread) { ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; + DWORD nRetParams; - *thread >> nRetParams; + if (*thread->GetBytePointer()) *thread >> nRetParams; + if (nRetParams) GetScriptParams(thread, nRetParams); scmFunc->Return(thread); if (nRetParams) SetScriptParams(thread, nRetParams); From 53550d96c982305b1b6f6252612a8be1ad2106f8 Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 2 Sep 2023 09:20:05 +0200 Subject: [PATCH 007/216] fixup! 0AB2 param count argument made optional. --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b53935cc..1a111727 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1713,7 +1713,7 @@ namespace CLEO { { ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; - DWORD nRetParams; + DWORD nRetParams = 0; if (*thread->GetBytePointer()) *thread >> nRetParams; if (nRetParams) GetScriptParams(thread, nRetParams); From a3041e11279d0b39fb5282764b5758391f16bbb5 Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 3 Sep 2023 05:16:15 +0200 Subject: [PATCH 008/216] cleo_call and cleo_return scope now save and restore GOSUB's call stack. --- source/CCustomOpcodeSystem.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 1a111727..07d069bb 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -826,6 +826,8 @@ namespace CLEO { { unsigned short prevScmFunctionId, thisScmFunctionId; BYTE *retnAddress; + BYTE* savedStack[8]; // gosub stack + WORD savedSP; SCRIPT_VAR savedTls[32]; std::list stringParams; // texts with this scope lifetime bool savedCondResult; @@ -860,12 +862,19 @@ namespace CLEO { auto cs = reinterpret_cast(thread); // create snapshot of current scope + std::copy(std::begin(cs->Stack), std::end(cs->Stack), std::begin(savedStack)); + savedSP = cs->SP; + auto scope = cs->IsMission() ? missionLocals : cs->LocalVar; std::copy(scope, scope + 32, savedTls); + savedCondResult = cs->bCondResult; savedLogicalOp = cs->LogicalOp; savedNotFlag = cs->NotFlag; + // init new scope + std::fill(std::begin(cs->Stack), std::end(cs->Stack), nullptr); + cs->SP = 0; cs->bCondResult = false; cs->LogicalOp = eLogicalOperation::NONE; cs->NotFlag = false; @@ -875,8 +884,13 @@ namespace CLEO { void Return(CRunningScript *thread) { - // restore parent scope's local variables auto cs = reinterpret_cast(thread); + + // restore parent scope's gosub call stack + std::copy(std::begin(savedStack), std::end(savedStack), std::begin(cs->Stack)); + cs->SP = savedSP; + + // restore parent scope's local variables std::copy(savedTls, savedTls + 32, cs->IsMission() ? missionLocals : cs->LocalVar); // process conditional result of just ended function in parent scope From 03bdb179364303b174ae4ce42355c305f3943fd0 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 19 Sep 2023 20:55:18 +0200 Subject: [PATCH 009/216] Update project configuration (#102) Fixed SDK includes. Configured post build events and debug settings. Added setup_env.bat and SETUP.md --- CLEO4.sln | 12 +++++----- CLEO4.vcxproj | 62 +++++++++++++++++++++++++++++---------------------- SETUP.md | 7 ++++++ setup_env.bat | 55 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 33 deletions(-) create mode 100644 SETUP.md create mode 100644 setup_env.bat diff --git a/CLEO4.sln b/CLEO4.sln index 7b85ecbb..fc5c5f87 100644 --- a/CLEO4.sln +++ b/CLEO4.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27004.2002 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLEO4", "CLEO4.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" EndProject @@ -11,10 +11,10 @@ Global Release|GTASA = Release|GTASA EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.ActiveCfg = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.Build.0 = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.ActiveCfg = Release GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.Build.0 = Release GTASA|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.ActiveCfg = Debug|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.Build.0 = Debug|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.ActiveCfg = Release|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index 343d34f5..22cae2d4 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -1,12 +1,12 @@  - - Release GTASA + + Release Win32 - - Debug GTASA + + Debug Win32 @@ -26,8 +26,8 @@ - Create - Create + Create + Create @@ -62,46 +62,47 @@ true Win32Proj CLEO4 - 10.0.18362.0 + 10.0 - + DynamicLibrary false MultiByte - v142 + v143 true - + DynamicLibrary true MultiByte - v142 + v143 - + - + - - $(SolutionDir)output\Release\ - $(SolutionDir)output\.obj\Release\ + + $(SolutionDir)output\$(Configuration)\ + $(SolutionDir)output\.obj\$(Configuration)\ CLEO .asi + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) - - $(SolutionDir)output\Debug\ - $(SolutionDir)output\.obj\Debug\ + + $(SolutionDir)output\$(Configuration)\ + $(SolutionDir)output\.obj\$(Configuration)\ CLEO .asi - $(VC_IncludePath);$(WindowsSdk_71A_IncludePath);E:\Documents\Para GTA\plugin-sdk-master\shared + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) - + Level3 MaxSpeed @@ -113,11 +114,12 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO4_EXPORTS;%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) Create + stdcpp17 true true - No + true UseLinkTimeCodeGeneration $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) bass.lib;%(AdditionalDependencies) @@ -126,10 +128,13 @@ false - xcopy /Y "$(SolutionDir)output\Release\CLEO.lib" "$(SolutionDir)cleo_sdk\" + xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" - + Level3 Disabled @@ -139,9 +144,10 @@ _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO4_EXPORTS;%(PreprocessorDefinitions); /Zc:threadSafeInit- %(AdditionalOptions) Create + stdcpp17 - Debug + true Default $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) bass.lib;%(AdditionalDependencies) @@ -150,8 +156,10 @@ false - - + xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 00000000..f03cd252 --- /dev/null +++ b/SETUP.md @@ -0,0 +1,7 @@ +# CLEO project configuration + +This project depends on Plugin SDK (https://github.com/DK22Pac/plugin-sdk). Using SDK's installer results in creation of PLUGIN_SDK_DIR envinroment variable in operating system. If installer is not used then please manually enter path to the sdk directory in setup_env.bat. +If GTA SA is installed in different than default location please open setup_env.bat file and configure correct path. +Run setup_env.bat to setup required envinroment variables. + +After opening project solution in Visual Studio it should be possible to build as well as debug CLEO in game. diff --git a/setup_env.bat b/setup_env.bat new file mode 100644 index 00000000..9730238c --- /dev/null +++ b/setup_env.bat @@ -0,0 +1,55 @@ +@ECHO OFF + +@SET GAME_DIR=C:\Program Files (x86)\Rockstar Games\GTA San Andreas +@SET PLUGIN_DIR=C:\plugin-sdk-master + + + + + + + + + + + +@IF DEFINED GTA_SA_DIR ( + ECHO GTA_SA_DIR already set to: + ECHO "%GTA_SA_DIR%" + ECHO: + ECHO New value: + ECHO "%GAME_DIR%" + ECHO: + CHOICE /C YN /M "Do you want to update?" + If ERRORLEVEL 2 GOTO SET_GAME_END +) + +@SETX GTA_SA_DIR "%GAME_DIR%" +ECHO GTA_SA_DIR configured as: +ECHO "%GAME_DIR%" +:SET_GAME_END + +ECHO: +ECHO: + +@IF DEFINED PLUGIN_SDK_DIR ( + ECHO PLUGIN_SDK_DIR already set to: + ECHO "%PLUGIN_SDK_DIR%" + ECHO . + ECHO New value: + ECHO "%PLUGIN_DIR%" + ECHO . + CHOICE /C YN /M "Do you want to update?" + If ERRORLEVEL 2 GOTO SET_PLUGIN_END +) + +@SETX PLUGIN_SDK_DIR "%PLUGIN_DIR%" +ECHO PLUGIN_SDK_DIR configured as: +ECHO "%PLUGIN_DIR%" +:SET_PLUGIN_END + +ECHO: +ECHO: + +pause +exit From efe00ef49945a85012cc2938c27ff82cccea5866 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 19 Sep 2023 20:56:56 +0200 Subject: [PATCH 010/216] Add opcode 0DD5 (get_platform) (#100) --- CHANGELOG.md | 4 ++++ README.md | 2 +- source/CCustomOpcodeSystem.cpp | 32 +++++++++++--------------------- source/CCustomOpcodeSystem.h | 25 +++++++++++++++++++++++++ source/CGameVersionManager.h | 15 +++++++++++++++ 5 files changed, 56 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45d899ec..e0317dfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.4.5 + +- Added opcode 0DD5 (get_platform). + ## 4.4.4 - added string arguments support to 0AB1 (cleo_call) diff --git a/README.md b/README.md index a0d02c5d..daf6fe01 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CLEO Library for GTA San Andreas +# CLEO Library for GTA San Andreas (Windows PC) CLEO is a hugely popular extensible library plugin which brings new possibilities in scripting for the game Grand Theft Auto: San Andreas by Rockstar Games, allowing the use of thousands of unique mods which change or expand the gameplay. You may find more information about CLEO on the official website https://cleo.li diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 07d069bb..855acfbb 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -122,6 +122,8 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread); OpcodeResult __stdcall opcode_0AEF(CRunningScript *thread); + OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform + CustomOpcodeHandler customOpcodeHandlers[100] = { opcode_0A8C, opcode_0A8D, opcode_0A8E, opcode_0A8F, opcode_0A90, @@ -262,6 +264,8 @@ namespace CLEO { // fill the rest with default handler std::fill(newOpcodeHandlerTable + 28, newOpcodeHandlerTable + 329, reinterpret_cast<_OpcodeHandler>(extraOpcodeHandler)); + CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform + FUNC_fopen = gvm.TranslateMemoryAddress(MA_FOPEN_FUNCTION); FUNC_fclose = gvm.TranslateMemoryAddress(MA_FCLOSE_FUNCTION); FUNC_fread = gvm.TranslateMemoryAddress(MA_FREAD_FUNCTION); @@ -2645,6 +2649,13 @@ namespace CLEO { *thread << (float)(log(arg) / log(base)); return OR_CONTINUE; } + + //0DD5=1,%1d% = get_platform + OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread) + { + *thread << PLATFORM_WINDOWS; + return OR_CONTINUE; + } } @@ -2655,27 +2666,6 @@ namespace CLEO { extern "C" { using namespace CLEO; - - // Define external symbols with MSVC decorating schemes - BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); - DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); - float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); - void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); - void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); - LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char *buf, int size); - LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char *buf, int size); - void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); - void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); - void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); - void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); - int WINAPI CLEO_GetOperandType(CRunningScript* thread); - void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript *thread, int count); - void WINAPI CLEO_RecordOpcodeParams(CRunningScript *thread, int count); - SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); - RwTexture * WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); - HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, CAudioStream *stream); - CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char *fileName, int label); - #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable:4550) diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 26f68d42..31b6ffd5 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -1,6 +1,7 @@ #pragma once #include "CCodeInjector.h" #include "CDebug.h" +#include "CSoundSystem.h" #include #include @@ -75,4 +76,28 @@ namespace CLEO }; extern void(__thiscall * ProcessScript)(CRunningScript*); + + // Exports + extern "C" + { + // Define external symbols with MSVC decorating schemes + BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); + DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); + float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); + void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); + void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); + LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf, int size); + LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf, int size); + void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); + void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); + void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); + void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); + int WINAPI CLEO_GetOperandType(CRunningScript* thread); + void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); + void WINAPI CLEO_RecordOpcodeParams(CRunningScript* thread, int count); + SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); + RwTexture* WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); + HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, CAudioStream* stream); + CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char* fileName, int label); + } } diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index a012a56a..eb47d56f 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -15,6 +15,21 @@ namespace CLEO GV_UNK = -1 }; + // returned by 0DD5: get_platform opcode + enum ePlatform + { + PLATFORM_NONE, + PLATFORM_ANDROID, + PLATFORM_PSP, + PLATFORM_IOS, + PLATFORM_FOS, + PLATFORM_XBOX, + PLATFORM_PS2, + PLATFORM_PS3, + PLATFORM_MAC, + PLATFORM_WINDOWS + }; + // determines the list of memory adresses, that can be translated // considering to game version enum eMemoryAddress From 98a4911413012e2511e2707f3c4d0b1a64261355 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 30 Sep 2023 02:17:48 +0200 Subject: [PATCH 011/216] Introducing cleo modules (#101) * cleo_call and cleo_return scope now save and restore GOSUB call stack and stack pointer. * Introduced CModuleSystem class. * Working prototype. * Support of path resolving for modules. * Path normalization updated. * Review fixes. * Disabled virtual absolute paths feature. * Fixed script location and name related functions. * Used game's APIs to resolve paths. Figured out using plugin_sdk classes in project. * Names case insensitive handling. * Modules reloading. * Automatic modules reloading. --- CLEO4.vcxproj | 21 ++ CLEO4.vcxproj.filters | 30 +++ source/CCustomOpcodeSystem.cpp | 93 ++++++- source/CCustomOpcodeSystem.h | 4 + source/CDebug.h | 5 + source/CModuleSystem.cpp | 444 +++++++++++++++++++++++++++++++++ source/CModuleSystem.h | 81 ++++++ source/CScriptEngine.cpp | 48 ++++ source/CScriptEngine.h | 60 +++-- source/CTheScripts.cpp | 54 ++++ source/CTheScripts.h | 10 + source/cleo.cpp | 66 +++++ source/cleo.h | 28 ++- 13 files changed, 904 insertions(+), 40 deletions(-) create mode 100644 source/CModuleSystem.cpp create mode 100644 source/CModuleSystem.h create mode 100644 source/CTheScripts.cpp diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index 22cae2d4..1cbb1ccb 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -12,6 +12,24 @@ + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + @@ -19,10 +37,12 @@ + + @@ -40,6 +60,7 @@ + diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index 35b0037e..7aa839a9 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -7,6 +7,9 @@ {d188d452-fbc6-48b5-bd49-d4036c989109} + + {5cead5cc-9a75-4d2e-99b5-ebbc8f9d6d86} + @@ -54,6 +57,30 @@ source + + source + + + source + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + @@ -113,6 +140,9 @@ source + + source + diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 855acfbb..e135a5b3 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -6,6 +6,8 @@ #include "CTextManager.h" #include "CModelInfo.h" +#include + namespace CLEO { DWORD FUNC_fopen; DWORD FUNC_fclose; @@ -829,6 +831,7 @@ namespace CLEO { struct ScmFunction { unsigned short prevScmFunctionId, thisScmFunctionId; + void* savedBaseIP; BYTE *retnAddress; BYTE* savedStack[8]; // gosub stack WORD savedSP; @@ -839,7 +842,9 @@ namespace CLEO { bool savedNotFlag; static const size_t store_size = 0x400; static ScmFunction *Store[store_size]; - static size_t allocationPlace; // contains an index of last allocated object + static size_t allocationPlace; // contains an index of last allocated object + std::string savedScriptFileDir; // modules switching + std::string savedScriptFileName; // modules switching void *operator new(size_t size) { @@ -866,6 +871,7 @@ namespace CLEO { auto cs = reinterpret_cast(thread); // create snapshot of current scope + savedBaseIP = cs->BaseIP; std::copy(std::begin(cs->Stack), std::end(cs->Stack), std::begin(savedStack)); savedSP = cs->SP; @@ -876,6 +882,9 @@ namespace CLEO { savedLogicalOp = cs->LogicalOp; savedNotFlag = cs->NotFlag; + savedScriptFileDir = thread->GetScriptFileDir(); + savedScriptFileName = thread->GetScriptFileName(); + // init new scope std::fill(std::begin(cs->Stack), std::end(cs->Stack), nullptr); cs->SP = 0; @@ -891,6 +900,7 @@ namespace CLEO { auto cs = reinterpret_cast(thread); // restore parent scope's gosub call stack + cs->BaseIP = savedBaseIP; std::copy(std::begin(savedStack), std::end(savedStack), std::begin(cs->Stack)); cs->SP = savedSP; @@ -917,6 +927,9 @@ namespace CLEO { cs->LogicalOp = savedLogicalOp; } + thread->SetScriptFileDir(savedScriptFileDir.c_str()); + thread->SetScriptFileName(savedScriptFileName.c_str()); + cs->SetIp(retnAddress); cs->SetScmFunction(prevScmFunctionId); } @@ -1649,8 +1662,77 @@ namespace CLEO { //0AB1=-1,call_scm_func %1p% OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread) { - int label; - *thread >> label; + BYTE* base = nullptr; + int label = 0; + + char* moduleTxt = nullptr; + switch (*thread->GetBytePointer()) + { + // label of current script + case DT_DWORD: + case DT_WORD: + case DT_BYTE: + case DT_VAR: + case DT_LVAR: + case DT_VAR_ARRAY: + case DT_LVAR_ARRAY: + base = thread->GetBasePointer(); // current script + *thread >> label; + break; + + // string with module and export name + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + moduleTxt = GetScriptParamPointer(thread)->pcParam; + break; + + case DT_STRING: + case DT_TEXTLABEL: + case DT_VARLEN_STRING: + moduleTxt = readString(thread); + break; + + default: + { + std::string err(128, '\0'); + sprintf(err.data(), "Invalid first argument type (%02X) of 0AB1 opcode in script '%s'", *thread->GetBytePointer(), thread->GetScriptFileName()); + Error(err.data()); + return OR_INTERRUPT; + } + } + + // parse module reference text + if (moduleTxt != nullptr) + { + std::string str(moduleTxt); + auto pos = str.find('@'); + if (pos == str.npos) + { + std::string err(128, '\0'); + sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", str.c_str(), thread->GetScriptFileName()); + Error(err.data()); + return OR_INTERRUPT; + } + str[pos] = '\0'; // split into two texts + + // get module's absolute path + std::string modulePath(&str[pos + 1]); + modulePath = ResolvePath(modulePath.c_str(), thread->GetScriptFileDir()); + + auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath.c_str(), &str[0]); + if (!scriptRef.Valid()) + { + std::string err(128, '\0'); + sprintf(err.data(), "Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], thread->GetScriptFileName()); + Error(err.data()); + return OR_INTERRUPT; + } + + base = (BYTE*)scriptRef.base; + label = scriptRef.offset; + } DWORD nParams = 0; if(*thread->GetBytePointer()) *thread >> nParams; @@ -1722,13 +1804,16 @@ namespace CLEO { } // jump to label - ThreadJump(thread, label); + thread->SetBaseIp(base); // script space + ThreadJump(thread, label); // script offset return OR_CONTINUE; } //0AB2=-1,ret OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread) { + GetInstance().ModuleSystem.ReleaseModuleRef((char*)thread->GetBasePointer()); // release module if one used + ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; DWORD nRetParams = 0; diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 31b6ffd5..362d8878 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -19,6 +19,10 @@ namespace CLEO bool is_legacy_handle(DWORD dwHandle); FILE * convert_handle_to_file(DWORD dwHandle); + extern const char* (__cdecl* GetUserDirectory)(); + extern void(__cdecl* ChangeToUserDir)(); + extern void(__cdecl* ChangeToProgramDir)(const char*); + class CCustomOpcodeSystem : public VInjectible { friend OpcodeResult __stdcall opcode_0A9A(CRunningScript *pScript); diff --git a/source/CDebug.h b/source/CDebug.h index d396a793..5084cb35 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -1,4 +1,5 @@ #pragma once +#include #define TRACE __noop @@ -11,6 +12,8 @@ const char szLogFileName[] = "cleo.log"; class CDebug { + std::mutex mutex; + #ifdef DEBUGIT std::ofstream m_hFile; #endif @@ -30,6 +33,8 @@ class CDebug void Trace(const char *format, ...) { + std::lock_guard guard(mutex); + SYSTEMTIME t; static char szBuf[1024]; diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp new file mode 100644 index 00000000..284f4f59 --- /dev/null +++ b/source/CModuleSystem.cpp @@ -0,0 +1,444 @@ +#include "stdafx.h" +#include "cleo.h" +#include "CModuleSystem.h" + +#include +#include +#include + +using namespace CLEO; + +void CModuleSystem::Clear() +{ + modules.clear(); +} + +const ScriptDataRef CModuleSystem::GetExport(const char* moduleName, const char* exportName) +{ + std::string path(moduleName); + NormalizePath(path); + + auto& it = modules.find(path); + if (it == modules.end()) // module not loaded yet? + { + if (!LoadFile(path.c_str())) + { + return {}; + } + + // check if available now + it = modules.find(path); + if (it == modules.end()) + { + return {}; + } + } + auto& module = it->second; + + auto e = module.GetExport(exportName); + if (e.Valid()) + { + module.refCount++; + } + return e; +} + +bool CModuleSystem::LoadFile(const char* path) +{ + std::string normalizedPath(path); + NormalizePath(normalizedPath); + + if (!modules[normalizedPath].LoadFromFile(normalizedPath.c_str())) + { + return false; + } + + return true; +} + +bool CModuleSystem::LoadDirectory(const char* path) +{ + bool result = true; + + auto p = CLEO::ResolvePath(path); // actual absolute path + try + { + for (auto& it : std::filesystem::recursive_directory_iterator(p)) + { + auto& filePath = it.path(); + if (filePath.extension() == ".s") + { + result &= LoadFile(filePath.string().c_str()); + } + } + } + catch (const std::exception& ex) + { + TRACE("Error while iterating CLEO Modules: %s", ex.what()); + return false; + } + + return result; +} + +bool CModuleSystem::LoadCleoModules() +{ + return LoadDirectory("3:\\"); // cleo\cleo_modules +} + +void CLEO::CModuleSystem::AddModuleRef(const char* baseIP) +{ + for (auto& it : modules) + { + auto& module = it.second; + + if (module.data.data() == baseIP) + { + module.refCount++; + return; + } + } +} + +void CLEO::CModuleSystem::ReleaseModuleRef(const char* baseIP) +{ + for (auto& it : modules) + { + auto& module = it.second; + + if (module.data.data() == baseIP) + { + module.refCount--; + return; + } + } +} + +void CModuleSystem::NormalizePath(std::string& path) +{ + for (char& c : path) + { + // standarize path separators + if (c == '/') + c = '\\'; + + // lower case + c = std::tolower(c); + }; +} + +void CModuleSystem::CModule::Update() +{ + while (updateActive) + { + if (!updateNeeded) + { + std::filesystem::file_time_type time; + try + { + time = std::filesystem::last_write_time(filepath); + } + catch (...) + { + time = {}; + } + + // file not exists or up to date + if (time == std::filesystem::file_time_type{} || time == fileTime) + { + // query files once a second + for(size_t i = 0; i < 100 && updateActive; i++) + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + continue; + } + + updateNeeded = true; + } + + if (refCount != 0) + { + continue; // module currently in use + } + + auto file = filepath; + auto result = LoadFromFile(file.c_str()); + updateNeeded = false; + TRACE("Module reload %s '%s'", result ? "OK" : "FAILED", file.c_str()); + } +} + +CModuleSystem::CModule::CModule() : + updateThread(&CModuleSystem::CModule::Update, this) +{ +} + +CModuleSystem::CModule::~CModule() +{ + updateActive = false; + updateThread.join(); +} + +void CModuleSystem::CModule::Clear() +{ + if (refCount != 0) + { + TRACE("Warning! Module '%s' cleared despite in use %d time(s)", filepath.c_str(), refCount.load()); + } + + std::lock_guard guard(updateMutex); + + filepath.clear(); + data.clear(); + exports.clear(); + + refCount = 0; + fileTime = {}; +} + +const char* CModuleSystem::CModule::GetFilepath() const +{ + return filepath.c_str(); +} + +bool CModuleSystem::CModule::LoadFromFile(const char* path) +{ + Clear(); + + std::lock_guard guard(updateMutex); + + filepath = path; + + try + { + fileTime = std::filesystem::last_write_time(path); + } + catch(...) + { + fileTime = {}; + } + + std::ifstream file(path, std::ios::binary); + if (!file.good()) + { + TRACE("Failed to open module file '%s'", path); + return false; + } + +#pragma warning ( push ) +#pragma warning ( disable: 4838 ) +#pragma warning ( disable: 4309 ) + const char Segment_First_Instruction[] = { 0x02, 0x00, 0x01 }; // jump, param type + const char Segment_Magic[] = { 0xFF, 0x7F, 0xFE, 0x00, 0x00 }; // Rockstar custom header magic + const char Header_Signature_Module_Exports[] = { 'E', 'X', 'P', 'T' }; // CLEO's module header signature +#pragma warning ( pop ) + + // read first instruction +#pragma pack(push, 1) + struct + { + char firstInstruction[3]; + int jumpAddress; + char magic[5]; + } segment; +#pragma pack(pop) + + file.read((char*)&segment, sizeof(segment)); + if (file.fail()) + { + TRACE("Module '%s' file header read error", path); + return false; + } + + // verify segment data + if (std::memcmp(segment.firstInstruction, Segment_First_Instruction, sizeof(Segment_First_Instruction)) != 0 || + segment.jumpAddress >= 0 || // jump labels should be negative values + std::memcmp(segment.magic, Segment_Magic, sizeof(Segment_Magic)) != 0) // not a custom header + { + TRACE("Module '%s' load error. Custom segment not present", path); + return false; + } + segment.jumpAddress = abs(segment.jumpAddress); // turn label into actual file offset + + // process custom headers +#pragma pack(push, 1) + struct + { + char signature[4]; + int size; + } header; +#pragma pack(pop) + + bool result = false; // no custom header found yet + while (file.tellg() < segment.jumpAddress) + { + file.read((char*)&header, sizeof(header)); + if (file.fail() || + file.tellg() > segment.jumpAddress) // read past the segment end + { + TRACE("Module '%s' load error. Invalid custom header", path); + return false; + } + + auto headerEndPos = file.tellg(); + headerEndPos += header.size; + + // CLEO Module Exports + if (std::memcmp(header.signature, Header_Signature_Module_Exports, sizeof(Header_Signature_Module_Exports)) == 0) + { + if (headerEndPos > segment.jumpAddress) + { + TRACE("Module '%s' load error. Invalid size of exports header", path); + return false; + } + + while (true) + { + ModuleExport e; + + if (!e.LoadFromFile(file) || + !file.good() || + file.tellg() > headerEndPos) + { + if (e.name.empty()) + { + TRACE("Module '%s' export load error.", path); + } + else + { + TRACE("Module's '%s' export '%s' load error.", path, e.name.c_str()); + } + return false; + } + + exports[e.name] = std::move(e); // move to container + result = true; // something useful loaded + + if (file.tellg() == headerEndPos) + { + break; // all exports done + } + } + } + else // other unknown header type + { + file.seekg(headerEndPos, file.beg); + if (file.fail()) + { + TRACE("Module '%s' load error. Error while skipping unknown header type", path); + return false; + } + } + } + + if (!file.good()) + { + TRACE("Module '%s' read error", path); + return false; + } + + if (!result) // no usable elements found. No point to keeping this module + { + TRACE("Module '%s' skipped. Nothing found", path); + return false; + } + + // get file size + file.seekg(0, file.end); + auto size = (size_t)file.tellg(); + file.seekg(0, file.beg); + + // store file data + data.resize(size); + file.read(data.data(), size); + if (file.fail()) + { + return false; + } + + return true; +} + +const ScriptDataRef CModuleSystem::CModule::GetExport(const char* name) +{ + auto normalized = std::string(name); + ModuleExport::NormalizeName(normalized); + + auto& it = exports.find(normalized); + if (it == exports.end()) + { + return {}; + } + auto& exp = it->second; + + return { data.data(), exp.offset }; +} + +void CModuleSystem::CModule::ModuleExport::Clear() +{ + name.clear(); + offset = 0; +} + +bool CModuleSystem::CModule::ModuleExport::LoadFromFile(std::ifstream& file) +{ + if (!file.good()) + { + return false; + } + + // name + std::getline(file, name, '\0'); + if (file.fail() || name.length() >= 0xFF) + { + return false; + } + NormalizeName(name); + + // address + file.read((char*)&offset, 4); + if (file.fail()) + { + return false; + } + + // input arg count + unsigned char inParamCount; + file.read((char*)&inParamCount, 1); + if (file.fail()) + { + return false; + } + + // skip input argument types info + file.seekg(inParamCount, file.cur); + if (file.fail()) + { + return false; + } + + // return value count + unsigned char outParamCount; + file.read((char*)&outParamCount, 1); + if (file.fail()) + { + return false; + } + + // skip return value types info + file.seekg(outParamCount, file.cur); + if (file.fail()) + { + return false; + } + + return true; // done +} + +void CModuleSystem::CModule::ModuleExport::NormalizeName(std::string& name) +{ + for (auto& ch : name) + { + ch = std::tolower(ch); + } +} diff --git a/source/CModuleSystem.h b/source/CModuleSystem.h new file mode 100644 index 00000000..286bb4c6 --- /dev/null +++ b/source/CModuleSystem.h @@ -0,0 +1,81 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace CLEO +{ + struct ScriptDataRef + { + char* base = nullptr; // script's base data + int offset = 0; // address within the script + + bool Valid() const + { + return base != nullptr; + } + }; + + class CModuleSystem + { + public: + void Clear(); + + // registers module reference. Needs to be released with ReleaseModuleRef + const ScriptDataRef GetExport(const char* moduleName, const char* exportName); + + bool LoadFile(const char* const path); // single file + bool LoadDirectory(const char* const path); // all modules in directory + bool LoadCleoModules(); // all in cleo\cleo_modules + + // marking modules usage + void AddModuleRef(const char* baseIP); + void ReleaseModuleRef(const char* baseIP); + + private: + static void NormalizePath(std::string& path); + + class CModule + { + friend class CModuleSystem; + + struct ModuleExport + { + std::string name; + int offset = 0; // address within module's data + + void Clear(); + bool LoadFromFile(std::ifstream& file); + + static void NormalizeName(std::string& name); + }; + + std::string filepath; // source file + std::vector data; + std::map exports; + + // hot reloading when source file modified + std::atomic refCount = 0; + std::filesystem::file_time_type fileTime; // last write time of source file + void Update(); + std::atomic updateActive = true; + std::atomic updateNeeded = false; + std::mutex updateMutex; + std::thread updateThread; + + public: + CModule(); + ~CModule(); + + void Clear(); + const char* GetFilepath() const; + bool LoadFromFile(const char* path); + const ScriptDataRef GetExport(const char* name); + }; + + std::map modules; + }; +} + diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index b7b1da1b..de5e8d44 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1,5 +1,9 @@ #include "stdafx.h" #include "cleo.h" +#include "CFileMgr.h" +#include "CGame.h" + +#include namespace CLEO { @@ -180,6 +184,10 @@ namespace CLEO GetInstance().TextManager.ClearDynamicFxts(); GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().SoundSystem.UnloadAllStreams(); + + GetInstance().ScriptEngine.Initialize(); + GetInstance().ModuleSystem.Clear(); + //GetInstance().ModuleSystem.LoadCleoModules(); // TODO: enbale if cleo_modules approved GetInstance().ScriptEngine.LoadCustomScripts(false); } @@ -280,6 +288,7 @@ namespace CLEO gangWeapons[7].weapon1 = 22; gangWeapons[7].weapon2 = 28; gangWeapons[7].weapon3 = 0; + GetInstance().TextManager.ClearDynamicFxts(); GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().ScriptEngine.RemoveAllCustomScripts(); @@ -662,12 +671,44 @@ namespace CLEO inj.InjectFunction(&opcode_004E_hook, gvm.TranslateMemoryAddress(MA_OPCODE_004E)); } + CScriptEngine::CScriptEngine() + { + CustomMission = nullptr; + } + + CScriptEngine::~CScriptEngine() + { + TRACE("Unloading scripts..."); + RemoveAllCustomScripts(); + } + CleoSafeHeader safe_header; ThreadSavingInfo *safe_info; unsigned long *stopped_info; std::unique_ptr safe_info_utilizer; std::unique_ptr stopped_info_utilizer; + void CScriptEngine::Initialize() + { + if (CGame::bMissionPackGame == 0) // regular main game + { + //MainScriptFileDir = "0:\\data\\script"; // at user data TODO: enable when CLEO virtual paths available + MainScriptFileDir = CFileMgr::ms_rootDirName; + MainScriptFileDir += "data\\script"; + + MainScriptFileName = "main.scm"; + } + else // mission pack + { + //MainScriptFileDir = "1:\\MPACK\\MPACK"; // at user data TODO: enable when CLEO virtual paths available + MainScriptFileDir = CLEO::GetUserDirectory(); + MainScriptFileDir += "\\MPACK\\MPACK"; + MainScriptFileDir += std::to_string(CGame::bMissionPackGame); + + MainScriptFileName = "scr.scm"; + } + } + void CScriptEngine::LoadCustomScripts(bool load_mode) { char safe_name[MAX_PATH]; @@ -992,6 +1033,12 @@ namespace CLEO TRACE("Loading custom script %s...", szFileName); + // store script file directory and name + std::filesystem::path path = szFileName; + path = std::filesystem::absolute(path); + scriptFileDir = path.parent_path().string(); + scriptFileName = path.filename().string(); + try { std::ifstream is; @@ -999,6 +1046,7 @@ namespace CLEO { if (!parent) throw std::logic_error("Trying to create external thread from label without parent thread"); + BaseIP = parent->GetBasePointer(); CurrentIP = parent->GetBasePointer() - label; memcpy(Name, parent->Name, sizeof(Name)); diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 6142d96b..a1dac01e 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -29,6 +29,9 @@ namespace CLEO std::vector script_draws; std::vector script_texts; + std::string scriptFileDir; + std::string scriptFileName; + public: inline RwTexture* GetScriptTextureById(unsigned int id) { @@ -70,6 +73,14 @@ namespace CLEO void StoreScriptCustoms(); void RestoreScriptCustoms(); + + // absolute path to directory where script's source file is located + const char* GetScriptFileDir() const { return scriptFileDir.c_str(); } + void SetScriptFileDir(const char* directory) { scriptFileDir = directory; } + + // filename with type extension of script's source file + const char* GetScriptFileName() const { return scriptFileName.c_str(); } + void SetScriptFileName(const char* filename) { scriptFileName = filename; } }; class CScriptEngine : VInjectible @@ -81,34 +92,33 @@ namespace CLEO CCustomScript *CustomMission; CCustomScript * LoadScript(const char *szFilePath); - public: - static SCRIPT_VAR CleoVariables[0x400]; - inline CCustomScript * GetCustomMission() { return CustomMission; } - void LoadCustomScripts(bool bMode = false); - void SaveState(); - CRunningScript * FindScriptNamed(const char *); - CCustomScript * FindCustomScriptNamed(const char*); - void AddCustomScript(CCustomScript*); - void RemoveCustomScript(CCustomScript*); - void RemoveAllCustomScripts(); - void UnregisterAllScripts(); - void ReregisterAllScripts(); - inline size_t WorkingScriptsCount() { return CustomScripts.size(); } - virtual void Inject(CCodeInjector&); - - CScriptEngine() - { - CustomMission = nullptr; - } - - ~CScriptEngine() - { - TRACE("Unloading scripts..."); - RemoveAllCustomScripts(); - } + std::string MainScriptFileDir; + std::string MainScriptFileName; + + static SCRIPT_VAR CleoVariables[0x400]; + + CScriptEngine(); + ~CScriptEngine(); + + virtual void Inject(CCodeInjector&); + void Initialize(); // call after new game started + void LoadCustomScripts(bool bMode = false); + + void SaveState(); + + CRunningScript* FindScriptNamed(const char *); + CCustomScript* FindCustomScriptNamed(const char*); + void AddCustomScript(CCustomScript*); + void RemoveCustomScript(CCustomScript*); + void RemoveAllCustomScripts(); + void UnregisterAllScripts(); + void ReregisterAllScripts(); void DrawScriptStuff(char bBeforeFade); + + inline CCustomScript* GetCustomMission() { return CustomMission; } + inline size_t WorkingScriptsCount() { return CustomScripts.size(); } }; extern void(__thiscall * AddScriptToQueue)(CRunningScript *, CRunningScript **queue); diff --git a/source/CTheScripts.cpp b/source/CTheScripts.cpp new file mode 100644 index 00000000..bf510969 --- /dev/null +++ b/source/CTheScripts.cpp @@ -0,0 +1,54 @@ +#include "stdafx.h" +#include "CTheScripts.h" +#include "cleo.h" +//#include "CScriptEngine.h" + + +bool CRunningScript::IsCustom() +{ + auto cs = reinterpret_cast(this); + return cs->IsCustom(); +} + +const char* CRunningScript::GetScriptFileDir() +{ + if (IsCustom()) + { + return reinterpret_cast(this)->GetScriptFileDir(); + } + + return CLEO::GetInstance().ScriptEngine.MainScriptFileDir.c_str(); +} + +void CRunningScript::SetScriptFileDir(const char* directory) +{ + if (IsCustom()) + { + reinterpret_cast(this)->SetScriptFileDir(directory); + return; + } + + CLEO::GetInstance().ScriptEngine.MainScriptFileDir = directory; +} + +const char* CRunningScript::GetScriptFileName() +{ + if (IsCustom()) + { + return reinterpret_cast(this)->GetScriptFileName(); + } + + return CLEO::GetInstance().ScriptEngine.MainScriptFileName.c_str(); +} + +void CRunningScript::SetScriptFileName(const char* filename) +{ + if (IsCustom()) + { + reinterpret_cast(this)->SetScriptFileName(filename); + return; + } + + CLEO::GetInstance().ScriptEngine.MainScriptFileName = filename; +} + diff --git a/source/CTheScripts.h b/source/CTheScripts.h index e155bec2..3b189950 100644 --- a/source/CTheScripts.h +++ b/source/CTheScripts.h @@ -287,4 +287,14 @@ class CRunningScript bIsMission = 0; bWastedBustedCheck = 1; } + + bool IsCustom(); // is it CLEO Script? + + // absolute path to directory where script's source file is located + const char* GetScriptFileDir(); + void SetScriptFileDir(const char* directory); + + // filename with type extension of script's source file + const char* GetScriptFileName(); + void SetScriptFileName(const char* filename); }; diff --git a/source/cleo.cpp b/source/cleo.cpp index 95af0f14..e4db8cac 100644 --- a/source/cleo.cpp +++ b/source/cleo.cpp @@ -1,11 +1,77 @@ #include "stdafx.h" #include "cleo.h" + namespace CLEO { CCleoInstance CleoInstance; CCleoInstance& GetInstance() { return CleoInstance; } + std::string ResolvePath(const char* path, const char* workDir) + { + if (path == nullptr) + { + return {}; + } + + std::string result; + if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter + { + if (workDir != nullptr) + { + result = std::string(workDir) + '\\' + path; + } + else + { + // application's current working dir. Can be set with 0A99 + result = std::string(MAX_PATH, '\0'); + _getcwd(result.data(), MAX_PATH); + result.resize(strlen(result.data())); + + result.push_back('\\'); + result.append(path); + } + } + else + { + result = path; + } + + return result; + + // TODO: CLEO virtual paths. Enable later + // predefined CLEO paths starting with '[digit]:' + /*if (result.length() < 2 || result[1] != ':' || + result[0] < '0' || result[0] > '3') // supported range + { + return result; // not predefined path prefix found + } + + std::string resolved(MAX_PATH, '\0'); + + if (result[0] == '1') // 1: game saves + { + // TODO: move logic from CScriptEngine::Initialize() + resolved += &result[2]; // original path without '1:' prefix + return resolved; + } + + // 0: game root directory + // TODO: move logic from CScriptEngine::Initialize() + + if (result[0] == '2') // 2: cleo directory + { + resolved += "\\cleo"; + } + else if (result[0] == '3') // 3: cleo modules directory + { + resolved += "\\cleo\\cleo_modules"; + } + + resolved += &result[2]; // original path without 'X:' prefix + return resolved;*/ + } + void __declspec(naked) CCleoInstance::OnUpdateGameLogics() { //GetInstance().UpdateGameLogics(); // ! diff --git a/source/cleo.h b/source/cleo.h index f36a9985..29093adb 100644 --- a/source/cleo.h +++ b/source/cleo.h @@ -6,6 +6,7 @@ #include "CDebug.h" #include "CDmaFix.h" #include "CGameMenu.h" +#include "CModuleSystem.h" #include "CPluginSystem.h" #include "CScriptEngine.h" #include "CCustomOpcodeSystem.h" @@ -21,6 +22,18 @@ namespace CLEO bool m_bStarted; public: + CDmaFix DmaFix; + CGameMenu GameMenu; + CCodeInjector CodeInjector; + CGameVersionManager VersionManager; + CScriptEngine ScriptEngine; + CTextManager TextManager; + CCustomOpcodeSystem OpcodeSystem; + CModuleSystem ModuleSystem; + CSoundSystem SoundSystem; + CPluginSystem PluginSystem; + //CLegacy Legacy; + CCleoInstance() { m_bStarted = false; @@ -37,6 +50,7 @@ namespace CLEO void Start() { CreateDirectory("cleo", NULL); + //CreateDirectory("cleo/cleo_modules", NULL); // TODO: enbale if cleo_modules approved CreateDirectory("cleo/cleo_saves", NULL); CreateDirectory("cleo/cleo_text", NULL); CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init @@ -54,20 +68,12 @@ namespace CLEO { if (!m_bStarted) return; } - - CDmaFix DmaFix; - CGameMenu GameMenu; - CCodeInjector CodeInjector; - CGameVersionManager VersionManager; - CScriptEngine ScriptEngine; - CTextManager TextManager; - CCustomOpcodeSystem OpcodeSystem; - CSoundSystem SoundSystem; - CPluginSystem PluginSystem; - //CLegacy Legacy; }; CCleoInstance& GetInstance(); + + // get absolute path + std::string ResolvePath(const char* path, const char* workDir = nullptr); } #endif From 9868b1b965a261b165a8b9c9e6d4664a423b51a5 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 4 Oct 2023 02:54:44 +0200 Subject: [PATCH 012/216] Paths handling updates (#103) * Added CLEO version info to log file. * Fixed problem with export references releasing. Review fixes. --- CHANGELOG.md | 11 +- CLEO4.vcxproj | 9 +- CLEO4.vcxproj.filters | 9 +- LICENSE.md | 2 +- cleo_plugins/CLEO_Plugins.sln | 37 +++++ .../FileSystemOperations.cpp | 27 +++- .../FileSystemOperations.vcxproj | 60 ++++---- .../FileSystemOperations.vcxproj.filters | 0 .../IniFiles/IniFiles.cpp | 74 ++++------ .../IniFiles/IniFiles.vcxproj | 64 +++++---- .../IniFiles/IniFiles.vcxproj.filters | 0 .../IntOperations/IntOperations.cpp | 9 +- .../IntOperations/IntOperations.vcxproj | 64 +++++---- .../IntOperations.vcxproj.filters | 0 cleo_sdk/CLEO.h | 12 +- .../FileSystemOperations.sln | 25 ---- demo_plugins/IniFiles/IniFiles.sln | 25 ---- demo_plugins/IntOperations/IntOperations.sln | 25 ---- pack_release.bat | 22 +++ source/CCustomOpcodeSystem.cpp | 130 ++++++++++-------- source/CCustomOpcodeSystem.h | 1 + source/CDebug.h | 6 + source/CGameMenu.cpp | 4 +- source/CGameVersionManager.cpp | 3 +- source/CLEO4.rc | Bin 5394 -> 5376 bytes source/CModuleSystem.cpp | 46 +++---- source/CModuleSystem.h | 4 +- source/CPluginSystem.h | 15 +- source/CScriptEngine.cpp | 32 ++--- source/CScriptEngine.h | 17 ++- source/CTextManager.cpp | 11 +- source/CTheScripts.cpp | 101 ++++++++++++-- source/CTheScripts.h | 13 +- source/CleoVersion.h | 16 +++ source/FileEnumerator.h | 33 +++-- source/cleo.cpp | 65 --------- source/cleo.def | 1 + source/cleo.h | 15 +- source/stdafx.h | 20 ++- 39 files changed, 543 insertions(+), 465 deletions(-) create mode 100644 cleo_plugins/CLEO_Plugins.sln rename {demo_plugins/FileSystemOperations => cleo_plugins}/FileSystemOperations/FileSystemOperations.cpp (91%) rename {demo_plugins/FileSystemOperations => cleo_plugins}/FileSystemOperations/FileSystemOperations.vcxproj (70%) rename {demo_plugins/FileSystemOperations => cleo_plugins}/FileSystemOperations/FileSystemOperations.vcxproj.filters (100%) rename {demo_plugins/IniFiles => cleo_plugins}/IniFiles/IniFiles.cpp (78%) rename {demo_plugins/IniFiles => cleo_plugins}/IniFiles/IniFiles.vcxproj (70%) rename {demo_plugins/IniFiles => cleo_plugins}/IniFiles/IniFiles.vcxproj.filters (100%) rename {demo_plugins/IntOperations => cleo_plugins}/IntOperations/IntOperations.cpp (98%) rename {demo_plugins/IntOperations => cleo_plugins}/IntOperations/IntOperations.vcxproj (69%) rename {demo_plugins/IntOperations => cleo_plugins}/IntOperations/IntOperations.vcxproj.filters (100%) delete mode 100644 demo_plugins/FileSystemOperations/FileSystemOperations.sln delete mode 100644 demo_plugins/IniFiles/IniFiles.sln delete mode 100644 demo_plugins/IntOperations/IntOperations.sln create mode 100644 pack_release.bat create mode 100644 source/CleoVersion.h diff --git a/CHANGELOG.md b/CHANGELOG.md index e0317dfe..1af2e8a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ -## 4.4.5 - -- Added opcode 0DD5 (get_platform). +## 4.5.0 + +- added opcode 0DD5 (get_platform) +- updated project settings +- plugins moved to cleo\cleo_plugins directory +- rewriten Current Working Directory (editable with 0A99) handling. CWD changes are no longer affects internal game's processes and are not globally shared among all scripts. +- introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules +- new CLEO SDK export addded: CLEO_ResolvePath ## 4.4.4 diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index 1cbb1ccb..b1bad3be 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -60,6 +60,7 @@ + @@ -110,15 +111,15 @@ - $(SolutionDir)output\$(Configuration)\ - $(SolutionDir)output\.obj\$(Configuration)\ + $(SolutionDir).output\$(Configuration)\ + $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) - $(SolutionDir)output\$(Configuration)\ - $(SolutionDir)output\.obj\$(Configuration)\ + $(SolutionDir).output\$(Configuration)\ + $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index 7aa839a9..252f75ce 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -69,16 +69,16 @@ plugin_sdk - + plugin_sdk plugin_sdk - + plugin_sdk - + plugin_sdk @@ -143,6 +143,9 @@ source + + source + diff --git a/LICENSE.md b/LICENSE.md index c788f4df..cb79f086 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2007-2022, CLEO Library by Seemann, Alien and Deji +Copyright (c) 2007-2023, CLEO Library by Seemann, Alien, Deji and Miran Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln new file mode 100644 index 00000000..c59a49d9 --- /dev/null +++ b/cleo_plugins/CLEO_Plugins.sln @@ -0,0 +1,37 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FileSystemOperations", "FileSystemOperations\FileSystemOperations.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IniFiles", "IniFiles\IniFiles.vcxproj", "{6831362D-5226-4634-9DB4-266A1B6C3E6C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IntOperations", "IntOperations\IntOperations.vcxproj", "{68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x86 = Debug|x86 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|x86.ActiveCfg = Debug|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|x86.Build.0 = Debug|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|x86.ActiveCfg = Release|Win32 + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|x86.Build.0 = Release|Win32 + {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Debug|x86.ActiveCfg = Debug|Win32 + {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Debug|x86.Build.0 = Debug|Win32 + {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Release|x86.ActiveCfg = Release|Win32 + {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Release|x86.Build.0 = Release|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.ActiveCfg = Debug|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.Build.0 = Debug|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.ActiveCfg = Release|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A9E58D83-82BC-453A-95B3-2AE3449FE59F} + EndGlobalSection +EndGlobal diff --git a/demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp similarity index 91% rename from demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.cpp rename to cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index eba8203f..a1a4a5d7 100644 --- a/demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -3,11 +3,14 @@ using namespace plugin; -class FileSystemOperations { +class FileSystemOperations +{ public: - FileSystemOperations() { + FileSystemOperations() + { //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) { + if (CLEO_GetVersion() >= CLEO_VERSION) + { //register opcodes CLEO_RegisterOpcode(0x0B00, Script_FS_DeleteFile); CLEO_RegisterOpcode(0x0B01, Script_FS_DeleteDirectory); @@ -27,8 +30,9 @@ class FileSystemOperations { ****************************************************************/ { char FilePath[MAX_PATH]; - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); + CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + CLEO_SetThreadCondResult(thread, DeleteFile(FilePath)); return OR_CONTINUE; @@ -90,6 +94,8 @@ class FileSystemOperations { BOOL result; CLEO_ReadStringOpcodeParam(thread, DirPath, sizeof(DirPath)); + CLEO_ResolvePath(thread, DirPath, sizeof(DirPath)); + DeleteAllInsideFlag = CLEO_GetIntOpcodeParam(thread); if (DeleteAllInsideFlag) @@ -119,7 +125,10 @@ class FileSystemOperations { BOOL result; CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); + CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); + CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); result = GetFileAttributes(FilePath) & FILE_ATTRIBUTE_DIRECTORY; if (!result) @@ -141,7 +150,10 @@ class FileSystemOperations { BOOL result; CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); + CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); + CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); result = GetFileAttributes(FilePath) & FILE_ATTRIBUTE_DIRECTORY; if (result) @@ -164,7 +176,10 @@ class FileSystemOperations { DWORD fattr; CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); + CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); + CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); if (result = CopyFile(FilePath, NewFilePath, FALSE)) { @@ -239,7 +254,11 @@ class FileSystemOperations { char NewFilePath[MAX_PATH]; CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); + CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); + CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); + CLEO_SetThreadCondResult(thread, CopyDir(FilePath, NewFilePath)); return OR_CONTINUE; diff --git a/demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj similarity index 70% rename from demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.vcxproj rename to cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 1355535d..f9b721cf 100644 --- a/demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -1,12 +1,12 @@  - - Release GTASA + + Release Win32 - - Debug GTASA + + Debug Win32 @@ -15,45 +15,45 @@ true Win32Proj FileSystemOperations - 10.0.18362.0 + 10.0 - + DynamicLibrary false MultiByte - v142 + v143 true - + DynamicLibrary true MultiByte - v142 + v143 - + - + - - $(SolutionDir)output\ - $(ProjectDir).obj\Release\ + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ FileSystemOperations .cleo - - $(SolutionDir)output\ - $(ProjectDir).obj\Debug\ - FileSystemOperations_d + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + FileSystemOperations .cleo - + Level3 MaxSpeed @@ -61,37 +61,45 @@ true true MultiThreaded - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) true true - No + true UseLinkTimeCodeGeneration - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + - + Level3 Disabled true MultiThreadedDebug - $(PLUGIN_SDK_DIR)\shared\game\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) - Debug + true Default - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + diff --git a/demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.vcxproj.filters b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters similarity index 100% rename from demo_plugins/FileSystemOperations/FileSystemOperations/FileSystemOperations.vcxproj.filters rename to cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters diff --git a/demo_plugins/IniFiles/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp similarity index 78% rename from demo_plugins/IniFiles/IniFiles/IniFiles.cpp rename to cleo_plugins/IniFiles/IniFiles.cpp index 4fe883fa..ac16ab91 100644 --- a/demo_plugins/IniFiles/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -1,11 +1,14 @@ #include #include "CLEO.h" -class IniFiles { +class IniFiles +{ public: - IniFiles() { + IniFiles() + { //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) { + if (CLEO_GetVersion() >= CLEO_VERSION) + { // register opcodes CLEO_RegisterOpcode(0x0AF0, Script_InifileGetInt); CLEO_RegisterOpcode(0x0AF1, Script_InifileWriteInt); @@ -18,30 +21,13 @@ class IniFiles { MessageBox(HWND_DESKTOP, "An incorrect version of CLEO was loaded.", "IniFiles.cleo", MB_ICONERROR); } - static char* MakeFullPath(char *path, char *dst) - { - if (path[1] != ':') - { - //get current working directory - GetCurrentDirectory(MAX_PATH, dst); - strcat(dst, "\\"); - strcat(dst, path); - } - else - { - strcpy(dst, path); - } - return dst; - } - static OpcodeResult WINAPI Script_InifileGetInt(CScriptThread* thread) /**************************************************************** Opcode Format 0AF0=4,%4d% = get_int_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; int result; @@ -50,10 +36,9 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, GetPrivateProfileInt() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - result = GetPrivateProfileInt(sectionName, key, 0x80000000, iniPath); + result = GetPrivateProfileInt(sectionName, key, 0x80000000, path); CLEO_SetIntOpcodeParam(thread, result); CLEO_SetThreadCondResult(thread, result != 0x80000000); @@ -66,8 +51,7 @@ class IniFiles { 0AF1=4,write_int %1d% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; DWORD value; @@ -79,10 +63,9 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, WritePrivateProfileString() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - result = WritePrivateProfileString(sectionName, key, _itoa(value, strValue, 10), iniPath); + result = WritePrivateProfileString(sectionName, key, _itoa(value, strValue, 10), path); CLEO_SetThreadCondResult(thread, result); return OR_CONTINUE; @@ -94,8 +77,7 @@ class IniFiles { 0AF2=4,%4d% = get_float_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; float value = 0.0f; @@ -106,10 +88,9 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, GetPrivateProfileString() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), iniPath); + result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), path); if (result) { value = (float)atof(strValue); @@ -129,8 +110,7 @@ class IniFiles { 0AF3=4,write_float %1d% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; float value; @@ -142,12 +122,11 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, WritePrivateProfileString() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path sprintf(strValue, "%g", value); - result = WritePrivateProfileString(sectionName, key, strValue, iniPath); + result = WritePrivateProfileString(sectionName, key, strValue, path); CLEO_SetThreadCondResult(thread, result); return OR_CONTINUE; @@ -159,8 +138,7 @@ class IniFiles { 0AF4=4,%4d% = read_string_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; char strValue[100]; @@ -171,10 +149,9 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, GetPrivateProfileString() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), iniPath); + result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), path); if (result) { switch (CLEO_GetOperandType(thread)) @@ -204,12 +181,10 @@ class IniFiles { 0AF5=4,write_string %1s% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char iniPath[MAX_PATH]; - char path[100]; + char path[MAX_PATH]; char sectionName[100]; char key[100]; char strValue[100]; - char *strptr; BOOL result; CLEO_ReadStringPointerOpcodeParam(thread, strValue, sizeof(strValue)); @@ -217,10 +192,9 @@ class IniFiles { CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - //if path is short, WritePrivateProfileString() searches for the file in the Windows directory - MakeFullPath(path, iniPath); + CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - result = WritePrivateProfileString(sectionName, key, strValue, iniPath); + result = WritePrivateProfileString(sectionName, key, strValue, path); CLEO_SetThreadCondResult(thread, result); diff --git a/demo_plugins/IniFiles/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj similarity index 70% rename from demo_plugins/IniFiles/IniFiles/IniFiles.vcxproj rename to cleo_plugins/IniFiles/IniFiles.vcxproj index 44b07f64..c199d9fe 100644 --- a/demo_plugins/IniFiles/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -1,59 +1,59 @@  - - Release GTASA + + Release Win32 - - Debug GTASA + + Debug Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1} + {6831362D-5226-4634-9DB4-266A1B6C3E6C} true Win32Proj IniFiles - 10.0.18362.0 + 10.0 - + DynamicLibrary false MultiByte - v142 + v143 true - + DynamicLibrary true MultiByte - v142 + v143 - + - + - - $(SolutionDir)output\ - $(ProjectDir).obj\Release\ + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ IniFiles .cleo - - $(SolutionDir)output\ - $(ProjectDir).obj\Debug\ - IniFiles_d + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + IniFiles .cleo - + Level3 MaxSpeed @@ -61,37 +61,47 @@ true true MultiThreaded - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 true true - No + true UseLinkTimeCodeGeneration - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + - + Level3 Disabled true MultiThreadedDebug - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 - Debug + true Default - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + diff --git a/demo_plugins/IniFiles/IniFiles/IniFiles.vcxproj.filters b/cleo_plugins/IniFiles/IniFiles.vcxproj.filters similarity index 100% rename from demo_plugins/IniFiles/IniFiles/IniFiles.vcxproj.filters rename to cleo_plugins/IniFiles/IniFiles.vcxproj.filters diff --git a/demo_plugins/IntOperations/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp similarity index 98% rename from demo_plugins/IntOperations/IntOperations/IntOperations.cpp rename to cleo_plugins/IntOperations/IntOperations.cpp index f61628e3..14fb132a 100644 --- a/demo_plugins/IntOperations/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -3,11 +3,14 @@ using namespace plugin; -class IntOperations { +class IntOperations +{ public: - IntOperations() { + IntOperations() + { //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) { + if (CLEO_GetVersion() >= CLEO_VERSION) + { //register opcodes CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); CLEO_RegisterOpcode(0x0B11, Script_IntOp_OR); diff --git a/demo_plugins/IntOperations/IntOperations/IntOperations.vcxproj b/cleo_plugins/IntOperations/IntOperations.vcxproj similarity index 69% rename from demo_plugins/IntOperations/IntOperations/IntOperations.vcxproj rename to cleo_plugins/IntOperations/IntOperations.vcxproj index 96006976..72198218 100644 --- a/demo_plugins/IntOperations/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/IntOperations/IntOperations.vcxproj @@ -1,59 +1,59 @@  - - Release GTASA + + Release Win32 - - Debug GTASA + + Debug Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1} + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9} true Win32Proj IntOperations - 10.0.18362.0 + 10.0 - + DynamicLibrary false MultiByte - v142 + v143 true - + DynamicLibrary true MultiByte - v142 + v143 - + - + - - $(SolutionDir)output\ - $(ProjectDir).obj\Release\ + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ IntOperations .cleo - - $(SolutionDir)output\ - $(ProjectDir).obj\Debug\ - IntOperations_d + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + IntOperations .cleo - + Level3 MaxSpeed @@ -61,37 +61,47 @@ true true MultiThreaded - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 true true - No + true UseLinkTimeCodeGeneration - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + - + Level3 Disabled true MultiThreadedDebug - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(CLEO_SDK_SA_DIR)\;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 - Debug + true Default - $(PLUGIN_SDK_DIR)\output\lib\;$(CLEO_SDK_SA_DIR)\;%(AdditionalLibraryDirectories) + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) Windows + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + diff --git a/demo_plugins/IntOperations/IntOperations/IntOperations.vcxproj.filters b/cleo_plugins/IntOperations/IntOperations.vcxproj.filters similarity index 100% rename from demo_plugins/IntOperations/IntOperations/IntOperations.vcxproj.filters rename to cleo_plugins/IntOperations/IntOperations.vcxproj.filters diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 7eaa39db..96f82583 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -6,8 +6,7 @@ #include -#define CLEO_VERSION 0x04040400 -#define CLEO_VERSIONTEXT "4.4.4" +#define CLEO_VERSION 0x04050000 //result of CLEO_GetGameVersion() #define GV_US10 0 //1.0 us @@ -41,6 +40,12 @@ typedef union #define globalVarSString 0x0A //s$ #define localVarSString 0x0B //@s +// CLEO virtual paths prefixes. Expandable with CLEO_ResolvePath +const char DIR_GAME[] = "0:"; // game root directory +const char DIR_USER[] = "1:"; // game save directory +const char DIR_SCRIPT[] = "2:"; // current script directory +const char DIR_CLEO[] = "3:"; // game\cleo directory +const char DIR_MODULES[] = "4:"; // game\cleo\modules directory typedef int SCRIPT_HANDLE; typedef SCRIPT_HANDLE HANDLE_ACTOR, ACTOR, HACTOR, PED, HPED, HANDLE_PED; @@ -140,6 +145,9 @@ void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func); void WINAPI CLEO_RemoveScriptDeleteDelegate(FuncScriptDeleteDelegateT func); +// convert to absolute file path +void WINAPI CLEO_ResolvePath(CScriptThread* thread, char* inOutPath, DWORD pathMaxLen); + #ifdef __cplusplus } #endif //__cplusplus diff --git a/demo_plugins/FileSystemOperations/FileSystemOperations.sln b/demo_plugins/FileSystemOperations/FileSystemOperations.sln deleted file mode 100644 index 04ec507c..00000000 --- a/demo_plugins/FileSystemOperations/FileSystemOperations.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27004.2002 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FileSystemOperations", "FileSystemOperations\FileSystemOperations.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|GTASA = Debug|GTASA - Release|GTASA = Release|GTASA - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.ActiveCfg = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.Build.0 = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.ActiveCfg = Release GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.Build.0 = Release GTASA|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {C3CF1A15-418C-4682-8B41-D48FAAF388C6} - EndGlobalSection -EndGlobal diff --git a/demo_plugins/IniFiles/IniFiles.sln b/demo_plugins/IniFiles/IniFiles.sln deleted file mode 100644 index eb9218f8..00000000 --- a/demo_plugins/IniFiles/IniFiles.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27004.2002 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IniFiles", "IniFiles\IniFiles.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|GTASA = Debug|GTASA - Release|GTASA = Release|GTASA - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.ActiveCfg = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.Build.0 = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.ActiveCfg = Release GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.Build.0 = Release GTASA|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {D64AF46C-52A6-418A-9785-9FCA2928CC2C} - EndGlobalSection -EndGlobal diff --git a/demo_plugins/IntOperations/IntOperations.sln b/demo_plugins/IntOperations/IntOperations.sln deleted file mode 100644 index 7b106090..00000000 --- a/demo_plugins/IntOperations/IntOperations.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27004.2002 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IntOperations", "IntOperations\IntOperations.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|GTASA = Debug|GTASA - Release|GTASA = Release|GTASA - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.ActiveCfg = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Debug|GTASA.Build.0 = Debug GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.ActiveCfg = Release GTASA|Win32 - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}.Release|GTASA.Build.0 = Release GTASA|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {E5CD51E9-C44F-4BEB-993A-8EEDB0ACEAE0} - EndGlobalSection -EndGlobal diff --git a/pack_release.bat b/pack_release.bat new file mode 100644 index 00000000..701de58d --- /dev/null +++ b/pack_release.bat @@ -0,0 +1,22 @@ +@echo off + +SET zip="C:\Program Files\7-Zip\7z.exe" + +echo Preparing GTA SA CLEO +FOR /F "USEBACKQ" %%F IN (`powershell -NoLogo -NoProfile -Command ^(Get-Item ".output\Release\CLEO.asi"^).VersionInfo.FileVersion`) DO (SET fileVersion=%%F) +echo Detected version: %fileVersion% +SET outputFile=".\CLEO.SA_v%fileVersion%.zip" +if exist %outputFile% del %outputFile% /q + +%zip% a -tzip %outputFile% ".\Changelog.md" -bb2 | findstr "+" +%zip% rn %outputFile% "Changelog.md" "cleo_readme\Changelog.txt" -bso0 + +%zip% a -tzip %outputFile% ".\Readme.md" -bb2 | findstr "+" +%zip% rn %outputFile% "Readme.md" "cleo_readme\Readme.txt" -bso0 + +%zip% a -tzip %outputFile% ".\.output\Release\CLEO.asi" -bb2 | findstr "+" + +%zip% a -tzip %outputFile% "cleo_plugins\.output\*.cleo" -bb2 | findstr "+" +%zip% rn %outputFile% "cleo_plugins\.output" "cleo\cleo_plugins" -bso0 + +pause diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e135a5b3..3f251dbd 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -843,6 +843,7 @@ namespace CLEO { static const size_t store_size = 0x400; static ScmFunction *Store[store_size]; static size_t allocationPlace; // contains an index of last allocated object + void* moduleExportRef = 0; // modules switching. Points to modules baseIP in case if this is export call std::string savedScriptFileDir; // modules switching std::string savedScriptFileName; // modules switching @@ -1043,12 +1044,10 @@ namespace CLEO { //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - const char *script_name = readString(thread); - TRACE("[0A92] Starting new custom script %s from thread named %s", script_name, thread->GetName()); - char cwd[MAX_PATH]; - _getcwd(cwd, sizeof(cwd)); - _chdir(cleo_dir); - auto cs = new CCustomScript(script_name); + auto filename = thread->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName()); + + auto cs = new CCustomScript(filename.c_str()); SetScriptCondResult(thread, cs && cs->IsOK()); if (cs && cs->IsOK()) { @@ -1059,9 +1058,9 @@ namespace CLEO { { if (cs) delete cs; SkipUnusedParameters(thread); - TRACE("[0A92] Failed to load script '%s' from script '%s'.", script_name, thread->GetName()); + TRACE("[0A92] Failed to load script '%s' from script '%s'.", filename.c_str(), thread->GetName()); } - _chdir(cwd); + return OR_CONTINUE; } @@ -1081,14 +1080,11 @@ namespace CLEO { //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - char script_name[MAX_PATH]; - readString(thread, script_name); - strcat(script_name, ".cm"); // add custom mission extension - TRACE("[0A94] Starting new custom mission %s from thread named %s", script_name, thread->GetName()); - char cwd[MAX_PATH]; - _getcwd(cwd, sizeof(cwd)); - _chdir(cleo_dir); - auto cs = new CCustomScript(script_name, true); + auto filename = thread->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + filename += ".cm"; // add custom mission extension + TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName()); + + auto cs = new CCustomScript(filename.c_str(), true); SetScriptCondResult(thread, cs && cs->IsOK()); if (cs && cs->IsOK()) { @@ -1102,9 +1098,9 @@ namespace CLEO { { if (cs) delete cs; SkipUnusedParameters(thread); - TRACE("[0A94] Failed to load mission '%s' from script '%s'.", script_name, thread->GetName()); + TRACE("[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); } - _chdir(cwd); + return OR_CONTINUE; } @@ -1148,20 +1144,17 @@ namespace CLEO { auto paramType = *thread->GetBytePointer(); if (paramType >= 1 && paramType <= 8) { - // integer param + // numbered predefined paths DWORD param; *thread >> param; - //_chdir(param ? GetUserDirectory() : ""); - if (param) ChangeToUserDir(); - else ChangeToProgramDir(""); + + std::string path = std::to_string(param); + path += ":"; + thread->SetWorkDir(path.c_str()); } else { - // string param - char buf[MAX_PATH]; - std::fill(buf, buf + sizeof(buf), '\0'); - GetScriptStringParam(thread, buf, (BYTE)sizeof(buf)); - _chdir(buf); + thread->SetWorkDir(readString(thread)); } return OR_CONTINUE; } @@ -1169,7 +1162,7 @@ namespace CLEO { //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread) { - const char *fname = readString(thread); + auto filename = thread->ResolvePath(readString(thread)); auto paramType = *thread->GetBytePointer(); char mode[0x10]; @@ -1195,7 +1188,7 @@ namespace CLEO { GetScriptStringParam(thread, mode, sizeof(mode)); } - if (auto hfile = open_file(fname, mode, bLegacyMode)) + if (auto hfile = open_file(filename.c_str(), mode, bLegacyMode)) { GetInstance().OpcodeSystem.m_hFiles.insert(hfile); @@ -1208,9 +1201,6 @@ namespace CLEO { SetScriptCondResult(thread, false); } - char szBlah[MAX_PATH]; - _getcwd(szBlah, MAX_PATH); - return OR_CONTINUE; } @@ -1295,7 +1285,9 @@ namespace CLEO { //0AA2=2,%2h% = load_library %1d% // IF and SET OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread) { - auto libHandle = LoadLibrary(readString(thread)); + auto filename = thread->ResolvePath(readString(thread)); + + auto libHandle = LoadLibrary(filename.c_str()); *thread << libHandle; SetScriptCondResult(thread, libHandle != nullptr); if (libHandle) GetInstance().OpcodeSystem.m_hNativeLibs.insert(libHandle); @@ -1596,7 +1588,9 @@ namespace CLEO { //0AAB=1, file_exists %1d% OpcodeResult __stdcall opcode_0AAB(CRunningScript *thread) { - DWORD fAttr = GetFileAttributes(readString(thread)); + auto filename = thread->ResolvePath(readString(thread)); + + DWORD fAttr = GetFileAttributes(filename.c_str()); SetScriptCondResult(thread, (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY)); return OR_CONTINUE; } @@ -1604,7 +1598,9 @@ namespace CLEO { //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { - auto stream = GetInstance().SoundSystem.LoadStream(readString(thread)); + auto filename = thread->ResolvePath(readString(thread)); + + auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); *thread << stream; SetScriptCondResult(thread, stream != nullptr); return OR_CONTINUE; @@ -1662,7 +1658,6 @@ namespace CLEO { //0AB1=-1,call_scm_func %1p% OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread) { - BYTE* base = nullptr; int label = 0; char* moduleTxt = nullptr; @@ -1676,7 +1671,6 @@ namespace CLEO { case DT_LVAR: case DT_VAR_ARRAY: case DT_LVAR_ARRAY: - base = thread->GetBasePointer(); // current script *thread >> label; break; @@ -1702,26 +1696,30 @@ namespace CLEO { return OR_INTERRUPT; } } + + ScmFunction* scmFunc = new ScmFunction(thread); // parse module reference text if (moduleTxt != nullptr) { - std::string str(moduleTxt); + std::string_view str(moduleTxt); auto pos = str.find('@'); if (pos == str.npos) { std::string err(128, '\0'); - sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", str.c_str(), thread->GetScriptFileName()); + sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", moduleTxt, thread->GetScriptFileName()); Error(err.data()); return OR_INTERRUPT; } - str[pos] = '\0'; // split into two texts + std::string_view strExport = str.substr(0, pos); + std::string_view strModule = str.substr(pos + 1); - // get module's absolute path - std::string modulePath(&str[pos + 1]); - modulePath = ResolvePath(modulePath.c_str(), thread->GetScriptFileDir()); + // get module's file absolute path + auto modulePath = std::string(strModule); + modulePath = thread->ResolvePath(modulePath.c_str(), DIR_SCRIPT); // by default search relative to current script location - auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath.c_str(), &str[0]); + // get export reference + auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath, strExport); if (!scriptRef.Valid()) { std::string err(128, '\0'); @@ -1729,16 +1727,17 @@ namespace CLEO { Error(err.data()); return OR_INTERRUPT; } + scmFunc->moduleExportRef = scriptRef.base; // to be released on return - base = (BYTE*)scriptRef.base; + thread->SetScriptFileDir(std::filesystem::path(modulePath).parent_path().string().c_str()); + thread->SetScriptFileName(std::filesystem::path(modulePath).filename().string().c_str()); + thread->SetBaseIp(scriptRef.base); label = scriptRef.offset; } DWORD nParams = 0; if(*thread->GetBytePointer()) *thread >> nParams; - ScmFunction* scmFunc = new ScmFunction(thread); - static SCRIPT_VAR arguments[32]; SCRIPT_VAR* locals = thread->IsMission() ? missionLocals : thread->GetVarPtr(); SCRIPT_VAR* localsEnd = locals + 32; @@ -1804,7 +1803,6 @@ namespace CLEO { } // jump to label - thread->SetBaseIp(base); // script space ThreadJump(thread, label); // script offset return OR_CONTINUE; } @@ -1812,8 +1810,6 @@ namespace CLEO { //0AB2=-1,ret OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread) { - GetInstance().ModuleSystem.ReleaseModuleRef((char*)thread->GetBasePointer()); // release module if one used - ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; DWORD nRetParams = 0; @@ -1823,6 +1819,10 @@ namespace CLEO { scmFunc->Return(thread); if (nRetParams) SetScriptParams(thread, nRetParams); SkipUnusedParameters(thread); + + if(scmFunc->moduleExportRef != nullptr) + GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + delete scmFunc; return OR_CONTINUE; } @@ -1830,8 +1830,7 @@ namespace CLEO { //0AB3=2,var %1d% = %2d% OpcodeResult __stdcall opcode_0AB3(CRunningScript *thread) { - DWORD varId, - value; + DWORD varId, value; *thread >> varId >> value; GetInstance().ScriptEngine.CleoVariables[varId].dwParam = value; return OR_CONTINUE; @@ -2917,17 +2916,17 @@ extern "C" CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char *script_name, int label) { + auto filename = fromThread->ResolvePath(script_name, DIR_CLEO); // legacy: default search location is game\cleo directory + if (label != 0) // create from label { - TRACE("Starting new custom script from thread named %s label %i", script_name, label); + TRACE("Starting new custom script from thread named %s label %i", filename.c_str(), label); } else { - TRACE("Starting new custom script %s", script_name); + TRACE("Starting new custom script %s", filename.c_str()); } - char cwd[MAX_PATH]; - _getcwd(cwd, sizeof(cwd)); - _chdir(cleo_dir); + // if "label == 0" then "script_name" need to be the file name auto cs = new CCustomScript(script_name, false, reinterpret_cast(fromThread), label); if (fromThread) SetScriptCondResult(fromThread, cs && cs->IsOK()); @@ -2941,8 +2940,9 @@ extern "C" if (cs) delete cs; if (fromThread) SkipUnusedParameters(fromThread); TRACE("Failed to load script '%s'.", script_name); + return nullptr; } - _chdir(cwd); + return cs; } @@ -2961,4 +2961,18 @@ extern "C" scriptDeleteDelegate -= func; } + void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) + { + if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 1) + { + return; // invalid param + } + + auto resolved = thread->ResolvePath(inOutPath); + + if (resolved.length() >= pathMaxLen) + resolved.resize(pathMaxLen - 1); // and terminator character + + std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator + } } \ No newline at end of file diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 362d8878..c66f02d3 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -103,5 +103,6 @@ namespace CLEO RwTexture* WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, CAudioStream* stream); CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char* fileName, int label); + void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); } } diff --git a/source/CDebug.h b/source/CDebug.h index 5084cb35..4aafa23f 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -24,6 +24,12 @@ class CDebug CDebug() : m_hFile(szLogFileName) { Trace("Log started."); + +#ifdef _DEBUG + Trace("CLEO v%s DEBUG", CLEO_VERSION_DOT_STR); +#elif + Trace("CLEO v%s", CLEO_VERSION_DOT_STR); +#endif } ~CDebug() diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index 3a6abec8..d12f54b3 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -6,8 +6,6 @@ namespace CLEO { - const char CLEO_VERSION_STR[] = VERSION_STRING; - void(__cdecl * TextDraw) (float x, float y, const char* text); void(__cdecl * SetTextAlign) (BYTE nAlign); void(__cdecl * SetTextFont) (BYTE nFont); @@ -66,7 +64,7 @@ namespace CLEO auto cs_count = GetInstance().ScriptEngine.WorkingScriptsCount(); auto plugin_count = GetInstance().PluginSystem.GetNumPlugins(); std::ostringstream cleo_text; - cleo_text << "CLEO v" << CLEO_VERSION_STR; + cleo_text << "CLEO v" << CLEO_VERSION_DOT_STR; #ifdef _DEBUG cleo_text << " DEBUG"; #endif diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index eca415ff..7fcd200f 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "CGameVersionManager.h" +#include "CleoVersion.h" namespace CLEO { @@ -148,7 +149,7 @@ namespace CLEO int __stdcall CLEO_GetVersion() { - return VERSION_LONG; + return CLEO_VERSION; } } } diff --git a/source/CLEO4.rc b/source/CLEO4.rc index 9177193ad1933f4753fcb213bb2e4a6f0b1a6940..0527cdbcb93278c443e7f5fb28f795f3776e8ae0 100644 GIT binary patch delta 182 zcmbQF)u1(D!Q^j@BBIU=ISi=``3zw|vWTIWArr{X+c_BbQoaRfPt5Ri$Q@QfFX#%pTUJ8l);%HgdvQ<703@}@C1ta z0cByTLA1fwyaB$$CZKmwB|2q^KQD4eXzE4lfd HfDj7+n^GIN diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 284f4f59..44542254 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -1,6 +1,8 @@ #include "stdafx.h" #include "cleo.h" #include "CModuleSystem.h" +#include "CFileMgr.h" +#include "FileEnumerator.h" #include #include @@ -13,21 +15,20 @@ void CModuleSystem::Clear() modules.clear(); } -const ScriptDataRef CModuleSystem::GetExport(const char* moduleName, const char* exportName) +const ScriptDataRef CModuleSystem::GetExport(std::string modulePath, std::string_view exportName) { - std::string path(moduleName); - NormalizePath(path); + NormalizePath(modulePath); - auto& it = modules.find(path); + auto& it = modules.find(modulePath); if (it == modules.end()) // module not loaded yet? { - if (!LoadFile(path.c_str())) + if (!LoadFile(modulePath.c_str())) { return {}; } // check if available now - it = modules.find(path); + it = modules.find(modulePath); if (it == modules.end()) { return {}; @@ -35,7 +36,7 @@ const ScriptDataRef CModuleSystem::GetExport(const char* moduleName, const char* } auto& module = it->second; - auto e = module.GetExport(exportName); + auto e = module.GetExport(std::string(exportName)); if (e.Valid()) { module.refCount++; @@ -59,31 +60,19 @@ bool CModuleSystem::LoadFile(const char* path) bool CModuleSystem::LoadDirectory(const char* path) { bool result = true; - - auto p = CLEO::ResolvePath(path); // actual absolute path - try - { - for (auto& it : std::filesystem::recursive_directory_iterator(p)) - { - auto& filePath = it.path(); - if (filePath.extension() == ".s") - { - result &= LoadFile(filePath.string().c_str()); - } - } - } - catch (const std::exception& ex) + FilesWalk(path, ".s", [&](const char* filename) { - TRACE("Error while iterating CLEO Modules: %s", ex.what()); - return false; - } + result &= LoadFile(filename); + }); return result; } bool CModuleSystem::LoadCleoModules() { - return LoadDirectory("3:\\"); // cleo\cleo_modules + std::string path = CFileMgr::ms_rootDirName; + path += "\\cleo\\cleo_modules"; + return LoadDirectory(path.c_str()); } void CLEO::CModuleSystem::AddModuleRef(const char* baseIP) @@ -359,12 +348,11 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) return true; } -const ScriptDataRef CModuleSystem::CModule::GetExport(const char* name) +const ScriptDataRef CModuleSystem::CModule::GetExport(std::string name) { - auto normalized = std::string(name); - ModuleExport::NormalizeName(normalized); + ModuleExport::NormalizeName(name); - auto& it = exports.find(normalized); + auto& it = exports.find(name); if (it == exports.end()) { return {}; diff --git a/source/CModuleSystem.h b/source/CModuleSystem.h index 286bb4c6..f385a4d4 100644 --- a/source/CModuleSystem.h +++ b/source/CModuleSystem.h @@ -24,7 +24,7 @@ namespace CLEO void Clear(); // registers module reference. Needs to be released with ReleaseModuleRef - const ScriptDataRef GetExport(const char* moduleName, const char* exportName); + const ScriptDataRef GetExport(std::string modulePath, std::string_view exportName); bool LoadFile(const char* const path); // single file bool LoadDirectory(const char* const path); // all modules in directory @@ -72,7 +72,7 @@ namespace CLEO void Clear(); const char* GetFilepath() const; bool LoadFromFile(const char* path); - const ScriptDataRef GetExport(const char* name); + const ScriptDataRef GetExport(std::string name); }; std::map modules; diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 168659a8..f69bd7b3 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -14,16 +14,15 @@ namespace CLEO public: CPluginSystem() { - TRACE("Unloading plugins..."); - FilesWalk("cleo/*.cleo", [this](const char *libName) { - char libPath[MAX_PATH] = "cleo/"; - strcat(libPath, libName); - TRACE("Loading plugin %s", libPath); - HMODULE hlib = LoadLibrary(libPath); + TRACE("Loading plugins..."); + FilesWalk("cleo\\cleo_plugins", ".cleo", [this](const char *filename) + { + TRACE("Loading plugin %s", filename); + HMODULE hlib = LoadLibrary(filename); if (!hlib) { char message[MAX_PATH + 40]; - sprintf(message, "Error loading plugin %s", libPath); + sprintf(message, "Error loading plugin %s", filename); Warning(message); } else plugins.push_back(hlib); @@ -35,6 +34,6 @@ namespace CLEO std::for_each(plugins.begin(), plugins.end(), FreeLibrary); } - inline size_t GetNumPlugins() { return plugins.size(); } + inline size_t GetNumPlugins() { return plugins.size(); } }; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index de5e8d44..a9b7662f 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -692,21 +692,17 @@ namespace CLEO { if (CGame::bMissionPackGame == 0) // regular main game { - //MainScriptFileDir = "0:\\data\\script"; // at user data TODO: enable when CLEO virtual paths available - MainScriptFileDir = CFileMgr::ms_rootDirName; - MainScriptFileDir += "data\\script"; - + MainScriptFileDir = std::string(DIR_GAME) + "\\data\\script"; MainScriptFileName = "main.scm"; } else // mission pack { - //MainScriptFileDir = "1:\\MPACK\\MPACK"; // at user data TODO: enable when CLEO virtual paths available - MainScriptFileDir = CLEO::GetUserDirectory(); - MainScriptFileDir += "\\MPACK\\MPACK"; + MainScriptFileDir = std::string(DIR_USER) + "\\MPACK\\MPACK"; MainScriptFileDir += std::to_string(CGame::bMissionPackGame); - MainScriptFileName = "scr.scm"; } + + MainScriptCurWorkDir = DIR_GAME; } void CScriptEngine::LoadCustomScripts(bool load_mode) @@ -765,25 +761,25 @@ namespace CLEO memset(CleoVariables, 0, sizeof(CleoVariables)); } - char cwd[MAX_PATH]; - _getcwd(cwd, sizeof(cwd)); - _chdir(cleo_dir); + // [game root]\cleo + std::string scriptsDir = CFileMgr::ms_rootDirName; + scriptsDir += "\\cleo"; TRACE("Searching for cleo scripts"); - FilesWalk(cs_mask, [this](const char *filename) { + FilesWalk(scriptsDir.c_str(), cs_ext, [this](const char *filename) { LoadScript(filename); }); - FilesWalk(cs4_mask, [this](const char *filename) { + + FilesWalk(scriptsDir.c_str(), cs4_ext, [this](const char *filename) { auto cs = LoadScript(filename); if (cs) cs->SetCompatibility(CLEO_VER_4); }); - FilesWalk(cs3_mask, [this](const char *filename) { + + FilesWalk(scriptsDir.c_str(), cs3_ext, [this](const char *filename) { auto cs = LoadScript(filename); if (cs) cs->SetCompatibility(CLEO_VER_3); }); - - _chdir(cwd); } CCustomScript * CScriptEngine::LoadScript(const char * szFilePath) @@ -1023,7 +1019,7 @@ namespace CLEO CCustomScript::CCustomScript(const char *szFileName, bool bIsMiss, CCustomScript *parent, int label) : CRunningScript(), bSaveEnabled(false), bOK(false), LastSearchPed(0), LastSearchCar(0), LastSearchObj(0), - CompatVer(CLEO_VERSION) + CompatVer(CLEO_VER_CUR) { IsCustom(1); bIsMission = bUseMissionCleanup = bIsMiss; @@ -1039,6 +1035,8 @@ namespace CLEO scriptFileDir = path.parent_path().string(); scriptFileName = path.filename().string(); + workDir = "0:"; // game root + try { std::ifstream is; diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index a1dac01e..9dc31aac 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -4,10 +4,9 @@ namespace CLEO { - const char cleo_dir[] = "./cleo"; - const char cs_mask[] = "./*.cs"; - const char cs4_mask[] = "./*.cs4"; - const char cs3_mask[] = "./*.cs3"; + const char cs_ext[] = ".cs"; + const char cs4_ext[] = ".cs4"; + const char cs3_ext[] = ".cs3"; class CCustomScript : public CRunningScript { @@ -31,6 +30,7 @@ namespace CLEO std::string scriptFileDir; std::string scriptFileName; + std::string workDir; public: inline RwTexture* GetScriptTextureById(unsigned int id) @@ -50,8 +50,8 @@ namespace CLEO inline void SetNotFlag(bool b) { NotFlag = b; } inline char GetNotFlag() { return NotFlag; } inline void IsCustom(bool b) { MemWrite(reinterpret_cast(this) + 0xDF, b); } - inline bool IsCustom() { return MemRead(reinterpret_cast(this) + 0xDF); } - inline bool IsOK() { return bOK; } + inline bool IsCustom() const { return MemRead(reinterpret_cast(this) + 0xDF); } + inline bool IsOK() const { return bOK; } inline void enable_saving(bool en = true) { bSaveEnabled = en; } inline void SetCompatibility(CLEO_Version ver) { CompatVer = ver; } inline CLEO_Version GetCompatibility() { return CompatVer; } @@ -81,6 +81,10 @@ namespace CLEO // filename with type extension of script's source file const char* GetScriptFileName() const { return scriptFileName.c_str(); } void SetScriptFileName(const char* filename) { scriptFileName = filename; } + + // current working directory of this script. Can be changed ith 0A99 + const char* GetWorkDir() const { return workDir.c_str(); } + void SetWorkDir(const char* directory) { workDir = directory; } }; class CScriptEngine : VInjectible @@ -95,6 +99,7 @@ namespace CLEO public: std::string MainScriptFileDir; std::string MainScriptFileName; + std::string MainScriptCurWorkDir; static SCRIPT_VAR CleoVariables[0x400]; diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index 245b4a74..3a4ba40d 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -101,17 +101,11 @@ namespace CLEO return szResult; } - const char fxt_mask[] = "./*.fxt"; - const char fxt_dir[] = "./cleo/cleo_text"; - CTextManager::CTextManager() : fxts(1, crc32FromUpcaseStdString) { - char cwd[MAX_PATH]; - _getcwd(cwd, sizeof(cwd)); - _chdir(fxt_dir); - // parse FXT files - FilesWalk(fxt_mask, [this](const char *fname) { + FilesWalk("cleo\\cleo_text", ".fxt", [this](const char *fname) + { TRACE("Parsing FXT file %s", fname); try { @@ -126,7 +120,6 @@ namespace CLEO Warning(ss.str().c_str()); } }); - _chdir(cwd); } const char* CTextManager::Get(const char* key) diff --git a/source/CTheScripts.cpp b/source/CTheScripts.cpp index bf510969..c2326a48 100644 --- a/source/CTheScripts.cpp +++ b/source/CTheScripts.cpp @@ -1,54 +1,129 @@ #include "stdafx.h" #include "CTheScripts.h" #include "cleo.h" -//#include "CScriptEngine.h" +#include "CFileMgr.h" +using namespace CLEO; -bool CRunningScript::IsCustom() + +bool CRunningScript::IsCustom() const { - auto cs = reinterpret_cast(this); + auto cs = reinterpret_cast(this); return cs->IsCustom(); } -const char* CRunningScript::GetScriptFileDir() +const char* CRunningScript::GetScriptFileDir() const { if (IsCustom()) { - return reinterpret_cast(this)->GetScriptFileDir(); + return reinterpret_cast(this)->GetScriptFileDir(); } - return CLEO::GetInstance().ScriptEngine.MainScriptFileDir.c_str(); + return GetInstance().ScriptEngine.MainScriptFileDir.c_str(); } void CRunningScript::SetScriptFileDir(const char* directory) { if (IsCustom()) { - reinterpret_cast(this)->SetScriptFileDir(directory); + reinterpret_cast(this)->SetScriptFileDir(directory); return; } - CLEO::GetInstance().ScriptEngine.MainScriptFileDir = directory; + GetInstance().ScriptEngine.MainScriptFileDir = directory; } -const char* CRunningScript::GetScriptFileName() +const char* CRunningScript::GetScriptFileName() const { if (IsCustom()) { - return reinterpret_cast(this)->GetScriptFileName(); + return reinterpret_cast(this)->GetScriptFileName(); } - return CLEO::GetInstance().ScriptEngine.MainScriptFileName.c_str(); + return GetInstance().ScriptEngine.MainScriptFileName.c_str(); } void CRunningScript::SetScriptFileName(const char* filename) { if (IsCustom()) { - reinterpret_cast(this)->SetScriptFileName(filename); + reinterpret_cast(this)->SetScriptFileName(filename); return; } - CLEO::GetInstance().ScriptEngine.MainScriptFileName = filename; + GetInstance().ScriptEngine.MainScriptFileName = filename; +} + +const char* CRunningScript::GetWorkDir() const +{ + if (IsCustom()) + { + return reinterpret_cast(this)->GetWorkDir(); + } + + return GetInstance().ScriptEngine.MainScriptCurWorkDir.c_str(); +} + +void CRunningScript::SetWorkDir(const char* directory) +{ + if (IsCustom()) + { + reinterpret_cast(this)->SetWorkDir(directory); + return; + } + + GetInstance().ScriptEngine.MainScriptCurWorkDir = directory; +} + +std::string CRunningScript::ResolvePath(const char* path, const char* customWorkDir) const +{ + if (path == nullptr) + { + return {}; + } + + std::string result; + if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter + { + result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); + result.push_back('\\'); + result += path; + } + else + { + result = path; + } + + // predefined CLEO paths starting with '[digit]:' + if (result.length() < 2 || result[1] != ':' || + result[0] < DIR_GAME[0] || result[0] > DIR_MODULES[0]) // supported range + { + return result; // not predefined path prefix found + } + + if (result[0] == DIR_USER[0]) // saves/settings location + { + return std::string(GetUserDirectory()) + &result[2]; // original path without '1:' prefix; + } + + if (result[0] == DIR_SCRIPT[0]) // current script location + { + return std::string(GetScriptFileDir()) + &result[2]; // original path without '2:' prefix; + } + + // game root directory + std::string resolved = CFileMgr::ms_rootDirName; + + if (result[0] == DIR_CLEO[0]) // cleo directory + { + resolved += "\\cleo"; + } + else if (result[0] == DIR_MODULES[0]) // cleo modules directory + { + resolved += "\\cleo\\cleo_modules"; + } + + resolved += &result[2]; // original path without 'X:' prefix + return resolved; } diff --git a/source/CTheScripts.h b/source/CTheScripts.h index 3b189950..0ee03462 100644 --- a/source/CTheScripts.h +++ b/source/CTheScripts.h @@ -288,13 +288,20 @@ class CRunningScript bWastedBustedCheck = 1; } - bool IsCustom(); // is it CLEO Script? + bool IsCustom() const; // is it CLEO Script? // absolute path to directory where script's source file is located - const char* GetScriptFileDir(); + const char* GetScriptFileDir() const; void SetScriptFileDir(const char* directory); // filename with type extension of script's source file - const char* GetScriptFileName(); + const char* GetScriptFileName() const; void SetScriptFileName(const char* filename); + + // current working directory of this script. Can be changed ith 0A99 + const char* GetWorkDir() const; + void SetWorkDir(const char* directory); + + // convert to absolute path + std::string ResolvePath(const char* path, const char* customWorkDir = nullptr) const; }; diff --git a/source/CleoVersion.h b/source/CleoVersion.h new file mode 100644 index 00000000..8e8cf00e --- /dev/null +++ b/source/CleoVersion.h @@ -0,0 +1,16 @@ +#pragma once + +#define CLEO_VERSION_MAIN 4 +#define CLEO_VERSION_MAJOR 5 +#define CLEO_VERSION_MINOR 0 + +#define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) + +#define CLEO_VERSION_DOT CLEO_VERSION_MAIN.CLEO_VERSION_MAJOR.CLEO_VERSION_MINOR +#define CLEO_VERSION_COMMA CLEO_VERSION_MAIN,CLEO_VERSION_MAJOR,CLEO_VERSION_MINOR + +#define __TO_STR(x) #x +#define TO_STR(x) __TO_STR(x) + +#define CLEO_VERSION_DOT_STR TO_STR(CLEO_VERSION_DOT) +#define CLEO_VERSION_COMMA_STR TO_STR(CLEO_VERSION_COMMA) diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index a61baffd..0592a585 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -1,18 +1,31 @@ #pragma once +#include template -void FilesWalk(const char *file_mask, T cb) +void FilesWalk(const char* directory, const char* extension, T callback) { - HANDLE hSearch = NULL; - WIN32_FIND_DATA wfd; - memset(&wfd, 0, sizeof(WIN32_FIND_DATA)); + try + { + for (auto& it : std::filesystem::directory_iterator(directory)) + { + if (it.is_regular_file()) + { + auto& filePath = it.path(); - if ((hSearch = FindFirstFile(file_mask, &wfd)) == INVALID_HANDLE_VALUE) return; + if (extension != nullptr) + { + if (_stricmp(filePath.extension().string().c_str(), extension) != 0) + { + continue; + } + } - do + callback(std::filesystem::absolute(filePath).string().c_str()); + } + } + } + catch (const std::exception& ex) { - if (!(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) cb(wfd.cFileName); - } while (FindNextFile(hSearch, &wfd)); - - FindClose(hSearch); + TRACE("Error while iterating directory: %s", ex.what()); + } } diff --git a/source/cleo.cpp b/source/cleo.cpp index e4db8cac..5e0350c7 100644 --- a/source/cleo.cpp +++ b/source/cleo.cpp @@ -7,71 +7,6 @@ namespace CLEO CCleoInstance CleoInstance; CCleoInstance& GetInstance() { return CleoInstance; } - std::string ResolvePath(const char* path, const char* workDir) - { - if (path == nullptr) - { - return {}; - } - - std::string result; - if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter - { - if (workDir != nullptr) - { - result = std::string(workDir) + '\\' + path; - } - else - { - // application's current working dir. Can be set with 0A99 - result = std::string(MAX_PATH, '\0'); - _getcwd(result.data(), MAX_PATH); - result.resize(strlen(result.data())); - - result.push_back('\\'); - result.append(path); - } - } - else - { - result = path; - } - - return result; - - // TODO: CLEO virtual paths. Enable later - // predefined CLEO paths starting with '[digit]:' - /*if (result.length() < 2 || result[1] != ':' || - result[0] < '0' || result[0] > '3') // supported range - { - return result; // not predefined path prefix found - } - - std::string resolved(MAX_PATH, '\0'); - - if (result[0] == '1') // 1: game saves - { - // TODO: move logic from CScriptEngine::Initialize() - resolved += &result[2]; // original path without '1:' prefix - return resolved; - } - - // 0: game root directory - // TODO: move logic from CScriptEngine::Initialize() - - if (result[0] == '2') // 2: cleo directory - { - resolved += "\\cleo"; - } - else if (result[0] == '3') // 3: cleo modules directory - { - resolved += "\\cleo\\cleo_modules"; - } - - resolved += &result[2]; // original path without 'X:' prefix - return resolved;*/ - } - void __declspec(naked) CCleoInstance::OnUpdateGameLogics() { //GetInstance().UpdateGameLogics(); // ! diff --git a/source/cleo.def b/source/cleo.def index c6383e8e..dc63e3b9 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -27,3 +27,4 @@ EXPORTS _CLEO_GetLastCreatedCustomScript@0 @24 _CLEO_AddScriptDeleteDelegate@4 @25 _CLEO_RemoveScriptDeleteDelegate@4 @26 + _CLEO_ResolvePath@12 @27 diff --git a/source/cleo.h b/source/cleo.h index 29093adb..4eaae708 100644 --- a/source/cleo.h +++ b/source/cleo.h @@ -6,6 +6,7 @@ #include "CDebug.h" #include "CDmaFix.h" #include "CGameMenu.h" +#include "CleoVersion.h" #include "CModuleSystem.h" #include "CPluginSystem.h" #include "CScriptEngine.h" @@ -17,6 +18,13 @@ namespace CLEO { + // CLEO virtual paths prefixes. Expandable with ResolvePath + const char DIR_GAME[] = "0:"; // game root directory + const char DIR_USER[] = "1:"; // game save directory + const char DIR_SCRIPT[] = "2:"; // current script directory + const char DIR_CLEO[] = "3:"; // game\cleo directory + const char DIR_MODULES[] = "4:"; // game\cleo\modules directory + class CCleoInstance { bool m_bStarted; @@ -50,10 +58,10 @@ namespace CLEO void Start() { CreateDirectory("cleo", NULL); - //CreateDirectory("cleo/cleo_modules", NULL); // TODO: enbale if cleo_modules approved + CreateDirectory("cleo/cleo_modules", NULL); CreateDirectory("cleo/cleo_saves", NULL); CreateDirectory("cleo/cleo_text", NULL); - CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init + CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); UpdateGameLogics = VersionManager.TranslateMemoryAddress(MA_UPDATE_GAME_LOGICS_FUNCTION); @@ -71,9 +79,6 @@ namespace CLEO }; CCleoInstance& GetInstance(); - - // get absolute path - std::string ResolvePath(const char* path, const char* workDir = nullptr); } #endif diff --git a/source/stdafx.h b/source/stdafx.h index c16fa8b5..be7fe2ce 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -27,23 +27,21 @@ #include #include +#include "CleoVersion.h" #include "CTheScripts.h" enum CLEO_Version { - //CLEO_VER_1 = 0x0100, - //CLEO_VER_2 = 0x0200, - CLEO_VER_3 = 0x0300, - CLEO_VER_4_MIN = 0x0400, - CLEO_VER_4_2 = 0x0402, - CLEO_VER_4_3 = 0x0403, - CLEO_VER_4_4 = 0x0404, + //CLEO_VER_1 = 0x01000000, + //CLEO_VER_2 = 0x02000000, + CLEO_VER_3 = 0x03000000, + CLEO_VER_4_MIN = 0x04000000, + CLEO_VER_4_2 = 0x04020000, + CLEO_VER_4_3 = 0x04030000, + CLEO_VER_4_4 = 0x04040000, CLEO_VER_4 = CLEO_VER_4_4, - - CLEO_VERSION = CLEO_VER_4, + CLEO_VER_CUR = CLEO_VERSION, }; -#define VERSION_LONG 0x04040400 -#define VERSION_STRING "4.4.4" #define CPOOL_USE_HANDLE_ACCESS From ada6d50eec31aee11cdbaee39c44ba9bc591e7d9 Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 3 Oct 2023 21:20:13 -0400 Subject: [PATCH 013/216] Update CDebug.h --- source/CDebug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CDebug.h b/source/CDebug.h index 4aafa23f..4f64a3c6 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -27,7 +27,7 @@ class CDebug #ifdef _DEBUG Trace("CLEO v%s DEBUG", CLEO_VERSION_DOT_STR); -#elif +#else Trace("CLEO v%s", CLEO_VERSION_DOT_STR); #endif } From 02617e0c5ad81b579194eeb2711d445a5d6728d6 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 6 Oct 2023 16:20:08 +0200 Subject: [PATCH 014/216] Removed duplicated declarations of multiple objects. (#106) CLEO.h from sdk used in CLEO project. --- CLEO4.vcxproj | 11 +- CLEO4.vcxproj.filters | 16 +- .../FileSystemOperations.cpp | 1 + cleo_plugins/IniFiles/IniFiles.cpp | 10 +- cleo_plugins/IntOperations/IntOperations.cpp | 1 + cleo_sdk/CLEO.cpp | 126 +++++++ cleo_sdk/CLEO.h | 297 ++++++++++++----- source/CCodeInjector.cpp | 2 +- source/CCustomOpcodeSystem.cpp | 84 ++--- source/CCustomOpcodeSystem.h | 32 -- source/CDebug.h | 4 +- source/CDmaFix.cpp | 2 +- source/CGameMenu.cpp | 4 +- source/CGameVersionManager.cpp | 13 +- source/CGameVersionManager.h | 11 - source/CLEO4.rc | Bin 5376 -> 5454 bytes source/CModuleSystem.cpp | 2 +- source/CScriptEngine.cpp | 106 +++++- source/CScriptEngine.h | 19 +- source/CSoundSystem.cpp | 2 +- source/CTextManager.cpp | 2 +- source/CTheScripts.cpp | 129 -------- source/CTheScripts.h | 307 ------------------ source/{cleo.cpp => CleoBase.cpp} | 3 +- source/{cleo.h => CleoBase.h} | 12 +- source/CleoVersion.h | 16 - source/dllmain.cpp | 2 +- source/stdafx.h | 2 +- 28 files changed, 528 insertions(+), 688 deletions(-) create mode 100644 cleo_sdk/CLEO.cpp delete mode 100644 source/CTheScripts.cpp delete mode 100644 source/CTheScripts.h rename source/{cleo.cpp => CleoBase.cpp} (94%) rename source/{cleo.h => CleoBase.h} (82%) delete mode 100644 source/CleoVersion.h diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index b1bad3be..7d39d45a 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -11,6 +11,10 @@ + + NotUsing + NotUsing + NotUsing @@ -36,13 +40,12 @@ - + - @@ -59,15 +62,13 @@ - - + - diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index 252f75ce..cbc426e8 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -33,7 +33,7 @@ source - + source @@ -60,9 +60,6 @@ source - - source - plugin_sdk @@ -81,6 +78,9 @@ plugin_sdk + + cleo_sdk + @@ -104,7 +104,7 @@ source - + source @@ -122,9 +122,6 @@ source - - source - source @@ -143,9 +140,6 @@ source - - source - diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index a1a4a5d7..e05de685 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -1,6 +1,7 @@ #include "plugin.h" #include "CLEO.h" +using namespace CLEO; using namespace plugin; class FileSystemOperations diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index ac16ab91..9b17459a 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -1,6 +1,8 @@ #include #include "CLEO.h" +using namespace CLEO; + class IniFiles { public: @@ -156,10 +158,10 @@ class IniFiles { switch (CLEO_GetOperandType(thread)) { - case globalVarVString: - case localVarVString: - case globalVarSString: - case localVarSString: + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: CLEO_WriteStringOpcodeParam(thread, strValue); break; default: diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index 14fb132a..bea244a6 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -1,6 +1,7 @@ #include "plugin.h" #include "CLEO.h" +using namespace CLEO; using namespace plugin; class IntOperations diff --git a/cleo_sdk/CLEO.cpp b/cleo_sdk/CLEO.cpp new file mode 100644 index 00000000..03bef1ce --- /dev/null +++ b/cleo_sdk/CLEO.cpp @@ -0,0 +1,126 @@ +#include "CLEO.h" + +namespace CLEO +{ + +#ifdef __cplusplus +CRunningScript::CRunningScript() +{ + strcpy(Name, "noname"); + BaseIP = 0; + Previous = 0; + Next = 0; + CurrentIP = 0; + memset(Stack, 0, sizeof(Stack)); + SP = 0; + WakeTime = 0; + bIsActive = 0; + bCondResult = 0; + bUseMissionCleanup = 0; + bIsExternal = 0; + bTextBlockOverride = 0; + bExternalType = -1; + memset(LocalVar, 0, sizeof(LocalVar)); + LogicalOp = eLogicalOperation::NONE; + NotFlag = 0; + bWastedBustedCheck = 1; + bWastedOrBusted = 0; + SceneSkipIP = 0; + bIsMission = 0; + bIsCustom = 0; +} + +bool CRunningScript::IsActive() const { return bIsActive; } + +bool CRunningScript::IsExternal() const { return bIsExternal; } + +bool CRunningScript::IsMission() const { return bIsMission; } + +bool CRunningScript::IsCustom() const { return bIsCustom; } + +const char* CRunningScript::GetName() const { return Name; } + +BYTE* CRunningScript::GetBasePointer() const { return (BYTE*)BaseIP; } + +BYTE* CRunningScript::GetBytePointer() const { return CurrentIP; } + +void CRunningScript::SetIp(void* ip) { CurrentIP = (BYTE*)ip; } + +void CRunningScript::SetBaseIp(void* ip) { BaseIP = ip; } + +CRunningScript* CRunningScript::GetNext() const { return Next; } + +CRunningScript* CRunningScript::GetPrev() const { return Previous; } + +void CRunningScript::SetIsExternal(bool b) { bIsExternal = b; } + +void CRunningScript::SetActive(bool b) { bIsActive = b; } + +void CRunningScript::SetNext(CRunningScript* v) { Next = v; } + +void CRunningScript::SetPrev(CRunningScript* v) { Previous = v; } + +SCRIPT_VAR* CRunningScript::GetVarPtr() { return LocalVar; } + +SCRIPT_VAR* CRunningScript::GetVarPtr(int i) { return &LocalVar[i]; } + +int* CRunningScript::GetIntVarPtr(int i) { return (int*)&LocalVar[i].dwParam; } + +int CRunningScript::GetIntVar(int i) const { return LocalVar[i].dwParam; } + +void CRunningScript::SetIntVar(int i, int v) { LocalVar[i].dwParam = v; } + +void CRunningScript::SetFloatVar(int i, float v) { LocalVar[i].fParam = v; } + +char CRunningScript::GetByteVar(int i) const { return LocalVar[i].bParam; } + +bool CRunningScript::GetConditionResult() const { return bCondResult != 0; } + +char CRunningScript::ReadDataType() { return ReadDataByte(); } + +short CRunningScript::ReadDataVarIndex() { return ReadDataWord(); } + +short CRunningScript::ReadDataArrayOffset() { return ReadDataWord(); } + +short CRunningScript::ReadDataArrayIndex() { return ReadDataWord(); } + +short CRunningScript::ReadDataArraySize() { return ReadDataByte(); } + +short CRunningScript::ReadDataArrayFlags() { return ReadDataByte(); } + +void CRunningScript::IncPtr(int n) { CurrentIP += n; } + +int CRunningScript::ReadDataByte() +{ + char b = *CurrentIP; + ++CurrentIP; + return b; +} + +short CRunningScript::ReadDataWord() +{ + short v = *(short*)CurrentIP; + CurrentIP += 2; + return v; +} + +int CRunningScript::ReadDataInt() +{ + int i = *(int*)CurrentIP; + CurrentIP += 4; + return i; +} + +void CRunningScript::PushStack(BYTE* ptr) +{ + Stack[SP++] = ptr; +} + +BYTE* CRunningScript::PopStack() +{ + return Stack[--SP]; +} + +#endif // __cplusplus + +} // CLEO namespace diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 96f82583..ef8b5f78 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -1,46 +1,108 @@ /* - CLEO 4.4 header file; - Copyright (c) 2020 Alien, Deji, Junior_Djjr; + CLEO 4.5 header file + Copyright (c) 2023 Alien, Deji, Junior_Djjr, Miran */ #pragma once #include -#define CLEO_VERSION 0x04050000 +#define CLEO_VERSION_MAIN 4 +#define CLEO_VERSION_MAJOR 5 +#define CLEO_VERSION_MINOR 0 -//result of CLEO_GetGameVersion() -#define GV_US10 0 //1.0 us -#define GV_US11 1 //1.01 us - not supported -#define GV_EU10 2 //1.0 eu -#define GV_EU11 3 //1.01 eu -#define GV_UNK -1 //any other +#define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) + +#define __TO_STR(x) #x +#define TO_STR(x) __TO_STR(x) +#define CLEO_VERSION_STR TO_STR(CLEO_VERSION_MAIN.CLEO_VERSION_MAJOR.CLEO_VERSION_MINOR) // "x.x.x" -typedef union +namespace CLEO { - DWORD dwParam; - int nParam; - float fParam; - void * pParam; - char * szParam; -} SCRIPT_VAR; + +//result of CLEO_GetGameVersion() +enum eGameVersion : int +{ + GV_US10 = 0, // 1.0 us + GV_US11 = 1, // 1.01 us - not supported + GV_EU10 = 2, // 1.0 eu + GV_EU11 = 3, // 1.01 eu + GV_STEAM, + GV_TOTAL, + GV_UNK = -1 // any other +}; //operand types -#define globalVar 2 //$ -#define localVar 3 //@ -#define globalArr 7 //$(,) -#define localArr 8 //@(,) -#define imm8 4 //char -#define imm16 5 //short -#define imm32 1 //long, unsigned long -#define imm32f 6 //float -#define vstring 0x0E //"" -#define sstring 9 //'' -#define globalVarVString 0x10 //v$ -#define localVarVString 0x11 //@v -#define globalVarSString 0x0A //s$ -#define localVarSString 0x0B //@s - -// CLEO virtual paths prefixes. Expandable with CLEO_ResolvePath +enum eDataType : int +{ + DT_END, + DT_DWORD, // imm32 + DT_VAR, // globalVar $ + DT_LVAR, // localVar @ + DT_BYTE, // imm8 + DT_WORD, // imm16 + DT_FLOAT, // imm32f + DT_VAR_ARRAY, // globalArr $(,) + DT_LVAR_ARRAY, // localArr @(,) + DT_TEXTLABEL, // sstring '' + DT_VAR_TEXTLABEL, // globalVarSString s$ + DT_LVAR_TEXTLABEL, // localVarSString @s + DT_VAR_TEXTLABEL_ARRAY, + DT_LVAR_TEXTLABEL_ARRAY, + DT_VARLEN_STRING, // vstring "" + DT_STRING, + DT_VAR_STRING, // globalVarVString v$ + DT_LVAR_STRING, // localVarVString @v + DT_VAR_STRING_ARRAY, + DT_LVAR_STRING_ARRAY +}; + +const size_t MAX_STR_LEN = 0xff; // max length of string type parameter + +union SCRIPT_VAR +{ + DWORD dwParam; + short wParam; + WORD usParam; + BYTE ucParam; + char cParam; + bool bParam; + int nParam; + float fParam; + void* pParam; + char* pcParam; +}; + +enum eLogicalOperation : WORD +{ + NONE = 0, // just replace + + AND_2 = 1, // AND operation on results of next two conditional opcodes + AND_3, + AND_4, + AND_5, + AND_6, + AND_7, + AND_END, + + OR_2 = 21, // OR operation on results of next two conditional opcodes + OR_3, + OR_4, + OR_5, + OR_6, + OR_7, + OR_END, +}; +static eLogicalOperation& operator--(eLogicalOperation& o) +{ + if (o == eLogicalOperation::NONE) return o; // can not be decremented anymore + if (o == eLogicalOperation::OR_2) return o = eLogicalOperation::NONE; + + auto val = static_cast(o); // to number + val--; + return o = static_cast(val); +} + +// CLEO virtual path prefixes. Expandable with CLEO_ResolvePath const char DIR_GAME[] = "0:"; // game root directory const char DIR_USER[] = "1:"; // game save directory const char DIR_SCRIPT[] = "2:"; // current script directory @@ -53,101 +115,160 @@ typedef SCRIPT_HANDLE HANDLE_CAR, CAR, HCAR, VEHICLE, HVEHICLE, HANDLE_VEHICLE; typedef SCRIPT_HANDLE HANDLE_OBJECT, OBJECT, HOBJECT; typedef SCRIPT_HANDLE HSTREAM; -typedef struct CScriptThread CScriptThread; - #pragma pack(push,1) -struct CScriptThread +#ifdef __cplusplus +class CRunningScript +{ +protected: +#else +struct CRunningScript { - CScriptThread *next; //next script in queue - CScriptThread *prev; //previous script in queue - char threadName[8]; //name of thread, given by 03A4 opcode - BYTE *baseIp; //pointer to begin of script in memory - BYTE *ip; //current index pointer - BYTE *stack[8]; //return stack for 0050, 0051 - WORD sp; //current item in stack - WORD _f3A; //padding - SCRIPT_VAR tls[34]; //thread's local variables - BYTE isActive; //is current thread active - char condResult; //condition result (true or false) - char missionCleanupFlag; //clean mission - char external; //is thread external (from script.img) - BYTE _fC8; //unknown - BYTE _fC9; //unknown - BYTE _fCA; //unknown - BYTE _fCB; //unknown - DWORD wakeTime; //time, when script starts again after 0001 opcode - WORD logicalOp; //00D6 parameter - BYTE notFlag; //opcode & 0x8000 != 0 - BYTE wbCheckEnabled; //wasted_or_busted check flag - BYTE wastedOrBusted; //is player wasted or busted - BYTE _fD5; //unknown - WORD _fD6; //unknown - DWORD sceneSkip; //scene skip label ptr - BYTE missionFlag; //is mission thread - BYTE _fDD[3]; //padding +#endif + CRunningScript* Next; // 0x00 next script in queue + CRunningScript* Previous; // 0x04 previous script in queue + char Name[8]; // 0x08 name of script, given by 03A4 opcode + void* BaseIP; // 0x10 pointer to begin of script in memory + BYTE* CurrentIP; // 0x14 current instruction pointer + BYTE* Stack[8]; // 0x18 return stack for 0050, 0051 + WORD SP; // 0x38 current item in stack + BYTE _pad3A[2]; // 0x3A padding + SCRIPT_VAR LocalVar[32]; // 0x3C script's local variables + DWORD Timers[2]; // 0xBC script's timers + bool bIsActive; // 0xC4 is script active + bool bCondResult; // 0xC5 condition result + bool bUseMissionCleanup; // 0xC6 clean mission + bool bIsExternal; // 0xC7 is thread external (from script.img) + bool bTextBlockOverride; // 0xC8 + BYTE bExternalType; // 0xC9 + BYTE _padCA[2]; // 0xCA padding + DWORD WakeTime; // 0xCC time, when script starts again after 0001 opcode + eLogicalOperation LogicalOp;// 0xD0 opcode 00D6 parameter + bool NotFlag; // 0xD2 opcode & 0x8000 != 0 + bool bWastedBustedCheck; // 0xD3 wasted_or_busted check flag + bool bWastedOrBusted; // 0xD4 is player wasted or busted + char _padD5[3]; // 0xD5 padding + void* SceneSkipIP; // 0xD8 scene skip label ptr + bool bIsMission; // 0xDC is this script mission + BYTE _padDD[2]; // 0xDD padding + bool bIsCustom; // 0xDF is this CLEO script + +#ifdef __cplusplus +public: + CRunningScript(); + + bool IsActive() const; + bool IsExternal() const; + bool IsMission() const; + bool IsCustom() const; // is this CLEO Script? + const char* GetName() const; + BYTE* GetBasePointer() const; + BYTE* GetBytePointer() const; + void SetIp(void* ip); + void SetBaseIp(void* ip); + CRunningScript* GetNext() const; + CRunningScript* GetPrev() const; + void SetIsExternal(bool b); + void SetActive(bool b); + void SetNext(CRunningScript* v); + void SetPrev(CRunningScript* v); + SCRIPT_VAR* GetVarPtr(); + SCRIPT_VAR* GetVarPtr(int i); + int* GetIntVarPtr(int i); + int GetIntVar(int i) const; + void SetIntVar(int i, int v); + void SetFloatVar(int i, float v); + char GetByteVar(int i) const; + bool GetConditionResult() const; + + char ReadDataType(); + short ReadDataVarIndex(); + short ReadDataArrayOffset(); + short ReadDataArrayIndex(); + short ReadDataArraySize(); + short ReadDataArrayFlags(); + + void IncPtr(int n = 1); + int ReadDataByte(); + short ReadDataWord(); + int ReadDataInt(); + + void PushStack(BYTE* ptr); + BYTE* PopStack(); + #endif // __cplusplus }; #pragma pack(pop) +static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!"); -#define OR_CONTINUE 0 -#define OR_INTERRUPT 1 +// alias for legacy use +#ifdef __cplusplus + typedef class CRunningScript CScriptThread; +#else + typedef struct CRunningScript CScriptThread; +#endif -typedef int OpcodeResult; +enum OpcodeResult : char +{ + OR_CONTINUE = 0, + OR_INTERRUPT = 1 +}; -typedef OpcodeResult (CALLBACK* _pOpcodeHandler)(CScriptThread*); -typedef void(*FuncScriptDeleteDelegateT) (CScriptThread*); +typedef OpcodeResult (CALLBACK* _pOpcodeHandler)(CRunningScript*); +typedef void(*FuncScriptDeleteDelegateT) (CRunningScript*); #ifdef __cplusplus extern "C" { #endif //__cplusplus -DWORD WINAPI CLEO_GetVersion(); -int WINAPI CLEO_GetGameVersion(); + DWORD WINAPI CLEO_GetVersion(); + eGameVersion WINAPI CLEO_GetGameVersion(); BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); -DWORD WINAPI CLEO_GetIntOpcodeParam(CScriptThread* thread); -float WINAPI CLEO_GetFloatOpcodeParam(CScriptThread* thread); +DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); +float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -void WINAPI CLEO_SetIntOpcodeParam(CScriptThread* thread, DWORD value); -void WINAPI CLEO_SetFloatOpcodeParam(CScriptThread* thread, float value); +void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); +void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CScriptThread* thread, LPSTR buf, int size); -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CScriptThread* thread, LPSTR buf, int size); -void WINAPI CLEO_WriteStringOpcodeParam(CScriptThread* thread, LPCSTR str); +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, LPSTR buf, int size); +LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, LPSTR buf, int size); +void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); -void WINAPI CLEO_SetThreadCondResult(CScriptThread* thread, BOOL result); +void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); -void WINAPI CLEO_SkipOpcodeParams(CScriptThread* thread, int count); +void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); -void WINAPI CLEO_ThreadJumpAtLabelPtr(CScriptThread* thread, int labelPtr); +void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); -int WINAPI CLEO_GetOperandType(CScriptThread* thread); +eDataType WINAPI CLEO_GetOperandType(CRunningScript* thread); extern SCRIPT_VAR *opcodeParams; extern SCRIPT_VAR *missionLocals; //intermediate data is stored in opcodeParams array -void WINAPI CLEO_RetrieveOpcodeParams(CScriptThread *thread, int count); -void WINAPI CLEO_RecordOpcodeParams(CScriptThread *thread, int count); +void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript *thread, int count); +void WINAPI CLEO_RecordOpcodeParams(CRunningScript *thread, int count); -SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CScriptThread *thread); +SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CRunningScript *thread); -DWORD WINAPI CLEO_GetScriptTextureById(CScriptThread* thread, int id); // ret RwTexture * +DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret RwTexture * -HSTREAM WINAPI CLEO_GetInternalAudioStream(CScriptThread* thread, DWORD stream); // arg CAudioStream * +HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * -CScriptThread* WINAPI CLEO_CreateCustomScript(CScriptThread* fromThread, const char *script_name, int label); +CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char *script_name, int label); -CScriptThread* WINAPI CLEO_GetLastCreatedCustomScript(); +CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript(); void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func); void WINAPI CLEO_RemoveScriptDeleteDelegate(FuncScriptDeleteDelegateT func); // convert to absolute file path -void WINAPI CLEO_ResolvePath(CScriptThread* thread, char* inOutPath, DWORD pathMaxLen); +void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); #ifdef __cplusplus } #endif //__cplusplus + +} // CLEO namespace diff --git a/source/CCodeInjector.cpp b/source/CCodeInjector.cpp index e8029871..af9a78f8 100644 --- a/source/CCodeInjector.cpp +++ b/source/CCodeInjector.cpp @@ -1,5 +1,5 @@ #include "stdafx.h" -#include "cleo.h" +#include "CleoBase.h" #include "CDebug.h" #include "CCodeInjector.h" diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 3f251dbd..22a5ac3e 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1,5 +1,5 @@ #include "stdafx.h" -#include "cleo.h" +#include "CleoBase.h" #include "CLegacy.h" #include "CGameVersionManager.h" #include "CCustomOpcodeSystem.h" @@ -883,8 +883,8 @@ namespace CLEO { savedLogicalOp = cs->LogicalOp; savedNotFlag = cs->NotFlag; - savedScriptFileDir = thread->GetScriptFileDir(); - savedScriptFileName = thread->GetScriptFileName(); + savedScriptFileDir = cs->GetScriptFileDir(); + savedScriptFileName = cs->GetScriptFileName(); // init new scope std::fill(std::begin(cs->Stack), std::end(cs->Stack), nullptr); @@ -928,8 +928,8 @@ namespace CLEO { cs->LogicalOp = savedLogicalOp; } - thread->SetScriptFileDir(savedScriptFileDir.c_str()); - thread->SetScriptFileName(savedScriptFileName.c_str()); + cs->SetScriptFileDir(savedScriptFileDir.c_str()); + cs->SetScriptFileName(savedScriptFileName.c_str()); cs->SetIp(retnAddress); cs->SetScmFunction(prevScmFunctionId); @@ -1044,7 +1044,7 @@ namespace CLEO { //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName()); auto cs = new CCustomScript(filename.c_str()); @@ -1080,7 +1080,7 @@ namespace CLEO { //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName()); @@ -1150,11 +1150,11 @@ namespace CLEO { std::string path = std::to_string(param); path += ":"; - thread->SetWorkDir(path.c_str()); + reinterpret_cast(thread)->SetWorkDir(path.c_str()); } else { - thread->SetWorkDir(readString(thread)); + reinterpret_cast(thread)->SetWorkDir(readString(thread)); } return OR_CONTINUE; } @@ -1162,7 +1162,7 @@ namespace CLEO { //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); auto paramType = *thread->GetBytePointer(); char mode[0x10]; @@ -1285,7 +1285,7 @@ namespace CLEO { //0AA2=2,%2h% = load_library %1d% // IF and SET OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); auto libHandle = LoadLibrary(filename.c_str()); *thread << libHandle; @@ -1588,7 +1588,7 @@ namespace CLEO { //0AAB=1, file_exists %1d% OpcodeResult __stdcall opcode_0AAB(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); DWORD fAttr = GetFileAttributes(filename.c_str()); SetScriptCondResult(thread, (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY)); @@ -1598,7 +1598,7 @@ namespace CLEO { //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { - auto filename = thread->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); *thread << stream; @@ -1691,7 +1691,7 @@ namespace CLEO { default: { std::string err(128, '\0'); - sprintf(err.data(), "Invalid first argument type (%02X) of 0AB1 opcode in script '%s'", *thread->GetBytePointer(), thread->GetScriptFileName()); + sprintf(err.data(), "Invalid first argument type (%02X) of 0AB1 opcode in script '%s'", *thread->GetBytePointer(), reinterpret_cast(thread)->GetScriptFileName()); Error(err.data()); return OR_INTERRUPT; } @@ -1707,7 +1707,7 @@ namespace CLEO { if (pos == str.npos) { std::string err(128, '\0'); - sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", moduleTxt, thread->GetScriptFileName()); + sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", moduleTxt, reinterpret_cast(thread)->GetScriptFileName()); Error(err.data()); return OR_INTERRUPT; } @@ -1716,21 +1716,21 @@ namespace CLEO { // get module's file absolute path auto modulePath = std::string(strModule); - modulePath = thread->ResolvePath(modulePath.c_str(), DIR_SCRIPT); // by default search relative to current script location + modulePath = reinterpret_cast(thread)->ResolvePath(modulePath.c_str(), DIR_SCRIPT); // by default search relative to current script location // get export reference auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath, strExport); if (!scriptRef.Valid()) { std::string err(128, '\0'); - sprintf(err.data(), "Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], thread->GetScriptFileName()); + sprintf(err.data(), "Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], reinterpret_cast(thread)->GetScriptFileName()); Error(err.data()); return OR_INTERRUPT; } scmFunc->moduleExportRef = scriptRef.base; // to be released on return - thread->SetScriptFileDir(std::filesystem::path(modulePath).parent_path().string().c_str()); - thread->SetScriptFileName(std::filesystem::path(modulePath).filename().string().c_str()); + reinterpret_cast(thread)->SetScriptFileDir(std::filesystem::path(modulePath).parent_path().string().c_str()); + reinterpret_cast(thread)->SetScriptFileName(std::filesystem::path(modulePath).filename().string().c_str()); thread->SetBaseIp(scriptRef.base); label = scriptRef.offset; } @@ -1883,7 +1883,7 @@ namespace CLEO { { // steam offset is different, so get it manually for now CGameVersionManager& gvm = GetInstance().VersionManager; - DWORD hMarker = gvm.GetGameVersion() != GV_STEAM ? MenuManager->m_nTargetBlipIndex : *((DWORD*)0xC3312C); + DWORD hMarker = gvm.GetGameVersion() != GV_STEAM ? MenuManager->m_nTargetBlipIndex : *((DWORD*)0xC3312C); CMarker *pMarker; if (hMarker && (pMarker = &RadarBlips[LOWORD(hMarker)]) && /*pMarker->m_nPoolIndex == HIWORD(hMarker) && */pMarker->m_nBlipDisplay) { @@ -2775,31 +2775,31 @@ extern "C" #pragma warning(pop) #endif - DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread) + DWORD WINAPI CLEO_GetIntOpcodeParam(CLEO::CRunningScript* thread) { DWORD result; *thread >> result; return result; } - float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread) + float WINAPI CLEO_GetFloatOpcodeParam(CLEO::CRunningScript* thread) { float result; *thread >> result; return result; } - void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value) + void WINAPI CLEO_SetIntOpcodeParam(CLEO::CRunningScript* thread, DWORD value) { *thread << value; } - void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value) + void WINAPI CLEO_SetFloatOpcodeParam(CLEO::CRunningScript* thread, float value) { *thread << value; } - LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char *buf, int size) + LPSTR WINAPI CLEO_ReadStringOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) { static char internal_buf[MAX_STR_LEN]; if (!buf) { buf = internal_buf; size = MAX_STR_LEN; } @@ -2809,7 +2809,7 @@ extern "C" return buf; } - LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char *buf, int size) + LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) { static char internal_buf[MAX_STR_LEN]; if (!buf) { buf = internal_buf; size = MAX_STR_LEN; } @@ -2818,19 +2818,19 @@ extern "C" return readString(thread, buf, size); } - void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str) + void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, LPCSTR str) { auto dst = (char *)GetScriptParamPointer(thread); memcpy(dst, str, 16); dst[15] = '\0'; } - void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result) + void WINAPI CLEO_SetThreadCondResult(CLEO::CRunningScript* thread, BOOL result) { SetScriptCondResult(thread, result != FALSE); } - void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count) + void WINAPI CLEO_SkipOpcodeParams(CLEO::CRunningScript* thread, int count) { int len; for (int i = 0; i < count; i++) @@ -2874,32 +2874,32 @@ extern "C" } } - void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr) + void WINAPI CLEO_ThreadJumpAtLabelPtr(CLEO::CRunningScript* thread, int labelPtr) { ThreadJump(thread, labelPtr); } - int WINAPI CLEO_GetOperandType(CRunningScript* thread) + int WINAPI CLEO_GetOperandType(CLEO::CRunningScript* thread) { return *thread->GetBytePointer(); } - void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript *thread, int count) + void WINAPI CLEO_RetrieveOpcodeParams(CLEO::CRunningScript *thread, int count) { GetScriptParams(thread, count); } - void WINAPI CLEO_RecordOpcodeParams(CRunningScript *thread, int count) + void WINAPI CLEO_RecordOpcodeParams(CLEO::CRunningScript *thread, int count) { SetScriptParams(thread, count); } - SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread) + SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CLEO::CRunningScript* thread) { return GetScriptParamPointer(thread); } - RwTexture * WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id) + RwTexture * WINAPI CLEO_GetScriptTextureById(CLEO::CRunningScript* thread, int id) { CCustomScript* customScript = reinterpret_cast(thread); // We need to store-restore to update the texture list, not optimized, but this will not be used every frame anyway @@ -2909,14 +2909,14 @@ extern "C" return texture; } - HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, CAudioStream *stream) + CLEO::HSTREAM WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * { - return stream->GetInternal(); + return ((CAudioStream*)stream)->GetInternal(); } - CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char *script_name, int label) + CLEO::CRunningScript* WINAPI CLEO_CreateCustomScript(CLEO::CRunningScript* fromThread, const char *script_name, int label) { - auto filename = fromThread->ResolvePath(script_name, DIR_CLEO); // legacy: default search location is game\cleo directory + auto filename = reinterpret_cast(fromThread)->ResolvePath(script_name, DIR_CLEO); // legacy: default search location is game\cleo directory if (label != 0) // create from label { @@ -2946,7 +2946,7 @@ extern "C" return cs; } - CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript() + CLEO::CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript() { return lastScriptCreated; } @@ -2961,14 +2961,14 @@ extern "C" scriptDeleteDelegate -= func; } - void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) + void WINAPI CLEO_ResolvePath(CLEO::CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) { if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 1) { return; // invalid param } - auto resolved = thread->ResolvePath(inOutPath); + auto resolved = reinterpret_cast(thread)->ResolvePath(inOutPath); if (resolved.length() >= pathMaxLen) resolved.resize(pathMaxLen - 1); // and terminator character diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index c66f02d3..a8b23545 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -7,13 +7,6 @@ namespace CLEO { - const size_t MAX_STR_LEN = 0xff; // max length of string type parameter - enum OpcodeResult : char - { - OR_CONTINUE = 0, - OR_INTERRUPT = 1 - }; - typedef OpcodeResult(__stdcall * CustomOpcodeHandler)(CRunningScript*); void ResetScmFunctionStore(); bool is_legacy_handle(DWORD dwHandle); @@ -80,29 +73,4 @@ namespace CLEO }; extern void(__thiscall * ProcessScript)(CRunningScript*); - - // Exports - extern "C" - { - // Define external symbols with MSVC decorating schemes - BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); - DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); - float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); - void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); - void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); - LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf, int size); - LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf, int size); - void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); - void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); - void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); - void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); - int WINAPI CLEO_GetOperandType(CRunningScript* thread); - void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); - void WINAPI CLEO_RecordOpcodeParams(CRunningScript* thread, int count); - SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); - RwTexture* WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); - HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, CAudioStream* stream); - CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char* fileName, int label); - void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); - } } diff --git a/source/CDebug.h b/source/CDebug.h index 4f64a3c6..f4ed0858 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -26,9 +26,9 @@ class CDebug Trace("Log started."); #ifdef _DEBUG - Trace("CLEO v%s DEBUG", CLEO_VERSION_DOT_STR); + Trace("CLEO v%s DEBUG", CLEO_VERSION_STR); #else - Trace("CLEO v%s", CLEO_VERSION_DOT_STR); + Trace("CLEO v%s", CLEO_VERSION_STR); #endif } diff --git a/source/CDmaFix.cpp b/source/CDmaFix.cpp index 6dfb31d2..0e378833 100644 --- a/source/CDmaFix.cpp +++ b/source/CDmaFix.cpp @@ -2,7 +2,7 @@ #include "stdafx.h" #include "CDmaFix.h" #include "CGameVersionManager.h" -#include "cleo.h" +#include "CleoBase.h" namespace CLEO { diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index d12f54b3..e03daa95 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #include "CGameMenu.h" -#include "cleo.h" +#include "CleoBase.h" #include "CDebug.h" #include @@ -64,7 +64,7 @@ namespace CLEO auto cs_count = GetInstance().ScriptEngine.WorkingScriptsCount(); auto plugin_count = GetInstance().PluginSystem.GetNumPlugins(); std::ostringstream cleo_text; - cleo_text << "CLEO v" << CLEO_VERSION_DOT_STR; + cleo_text << "CLEO v" << CLEO_VERSION_STR; #ifdef _DEBUG cleo_text << " DEBUG"; #endif diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 7fcd200f..055554fa 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -1,6 +1,5 @@ #include "stdafx.h" #include "CGameVersionManager.h" -#include "CleoVersion.h" namespace CLEO { @@ -138,18 +137,14 @@ namespace CLEO extern "C" { - eGameVersion __stdcall CLEO_GetGameVersion(); - - int __stdcall CLEO_GetVersion(); - - eGameVersion __stdcall CLEO_GetGameVersion() + DWORD __stdcall CLEO_GetVersion() { - return DetermineGameVersion(); + return CLEO_VERSION; } - int __stdcall CLEO_GetVersion() + eGameVersion __stdcall CLEO_GetGameVersion() { - return CLEO_VERSION; + return DetermineGameVersion(); } } } diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index eb47d56f..de6bbadd 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -4,17 +4,6 @@ namespace CLEO { - enum eGameVersion - { - GV_US10, - GV_US11, - GV_EU10, - GV_EU11, - GV_STEAM, - GV_TOTAL, - GV_UNK = -1 - }; - // returned by 0DD5: get_platform opcode enum ePlatform { diff --git a/source/CLEO4.rc b/source/CLEO4.rc index 0527cdbcb93278c443e7f5fb28f795f3776e8ae0..d1d30dda274a86071d41dc9781a1233d006f94cb 100644 GIT binary patch delta 107 zcmZqBI;S;Zi@Y9#9zzU+GlLINb04nrzKK0_FgEMh2T$ON+UHeTMz#p%r8&)^G$j+Draw(bBeforeFade); } + void CCustomScript::Process() { RestoreScriptSpecifics(); @@ -539,6 +540,107 @@ namespace CLEO } *useTextCommands = UseTextCommands; } + + const char* CCustomScript::GetScriptFileDir() const + { + if(!bIsCustom) + return GetInstance().ScriptEngine.MainScriptFileDir.c_str(); + + return scriptFileDir.c_str(); + } + + void CCustomScript::SetScriptFileDir(const char* directory) + { + if (!bIsCustom) + GetInstance().ScriptEngine.MainScriptFileDir = directory; + else + scriptFileDir = directory; + } + + const char* CCustomScript::GetScriptFileName() const + { + if (!bIsCustom) + return GetInstance().ScriptEngine.MainScriptFileName.c_str(); + + return scriptFileName.c_str(); + } + + void CCustomScript::SetScriptFileName(const char* filename) + { + if (!bIsCustom) + GetInstance().ScriptEngine.MainScriptFileName = filename; + else + scriptFileName = filename; + } + + const char* CCustomScript::GetWorkDir() const + { + if (!bIsCustom) + return GetInstance().ScriptEngine.MainScriptCurWorkDir.c_str(); + + return workDir.c_str(); + } + + void CCustomScript::SetWorkDir(const char* directory) + { + if (!bIsCustom) + GetInstance().ScriptEngine.MainScriptCurWorkDir = directory; + else + workDir = directory; + } + + std::string CCustomScript::ResolvePath(const char* path, const char* customWorkDir) const + { + if (path == nullptr) + { + return {}; + } + + std::string result; + if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter + { + result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); + result.push_back('\\'); + result += path; + } + else + { + result = path; + } + + // predefined CLEO paths starting with '[digit]:' + if (result.length() < 2 || result[1] != ':' || + result[0] < DIR_GAME[0] || result[0] > DIR_MODULES[0]) // supported range + { + return result; // not predefined path prefix found + } + + if (result[0] == DIR_USER[0]) // saves/settings location + { + return std::string(GetUserDirectory()) + &result[2]; // original path without '1:' prefix; + } + + if (result[0] == DIR_SCRIPT[0]) // current script location + { + return std::string(GetScriptFileDir()) + &result[2]; // original path without '2:' prefix; + } + + // game root directory + std::string resolved = CFileMgr::ms_rootDirName; + + if (result[0] == DIR_CLEO[0]) // cleo directory + { + resolved += "\\cleo"; + } + else if (result[0] == DIR_MODULES[0]) // cleo modules directory + { + resolved += "\\cleo\\cleo_modules"; + } + + resolved += &result[2]; // original path without 'X:' prefix + return resolved; + } + void CCustomScript::StoreScriptTextures() { // store this scripts textures + restore SCM textures + make sure this scripts textures arent cleared by another @@ -1021,7 +1123,7 @@ namespace CLEO LastSearchPed(0), LastSearchCar(0), LastSearchObj(0), CompatVer(CLEO_VER_CUR) { - IsCustom(1); + bIsCustom = true; bIsMission = bUseMissionCleanup = bIsMiss; UseTextCommands = 0; NumDraws = 0; diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 9dc31aac..36075229 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -49,8 +49,6 @@ namespace CLEO inline void SetScmFunction(WORD id) { MemWrite(reinterpret_cast(this) + 0xDD, id); } inline void SetNotFlag(bool b) { NotFlag = b; } inline char GetNotFlag() { return NotFlag; } - inline void IsCustom(bool b) { MemWrite(reinterpret_cast(this) + 0xDF, b); } - inline bool IsCustom() const { return MemRead(reinterpret_cast(this) + 0xDF); } inline bool IsOK() const { return bOK; } inline void enable_saving(bool en = true) { bSaveEnabled = en; } inline void SetCompatibility(CLEO_Version ver) { CompatVer = ver; } @@ -75,16 +73,19 @@ namespace CLEO void RestoreScriptCustoms(); // absolute path to directory where script's source file is located - const char* GetScriptFileDir() const { return scriptFileDir.c_str(); } - void SetScriptFileDir(const char* directory) { scriptFileDir = directory; } + const char* GetScriptFileDir() const; + void SetScriptFileDir(const char* directory); // filename with type extension of script's source file - const char* GetScriptFileName() const { return scriptFileName.c_str(); } - void SetScriptFileName(const char* filename) { scriptFileName = filename; } + const char* GetScriptFileName() const; + void SetScriptFileName(const char* filename); // current working directory of this script. Can be changed ith 0A99 - const char* GetWorkDir() const { return workDir.c_str(); } - void SetWorkDir(const char* directory) { workDir = directory; } + const char* GetWorkDir() const; + void SetWorkDir(const char* directory); + + // create absolute file path + std::string ResolvePath(const char* path, const char* customWorkDir = nullptr) const; }; class CScriptEngine : VInjectible @@ -95,7 +96,7 @@ namespace CLEO std::set InactiveScriptHashes; CCustomScript *CustomMission; - CCustomScript * LoadScript(const char *szFilePath); + CCustomScript *LoadScript(const char *szFilePath); public: std::string MainScriptFileDir; std::string MainScriptFileName; diff --git a/source/CSoundSystem.cpp b/source/CSoundSystem.cpp index 799286bf..0a19bfdb 100644 --- a/source/CSoundSystem.cpp +++ b/source/CSoundSystem.cpp @@ -2,7 +2,7 @@ #include "CSoundSystem.h" #include "bass.h" #include "CDebug.h" -#include "cleo.h" +#include "CleoBase.h" #include namespace CLEO diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index 3a4ba40d..ac8e79c2 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "CTextManager.h" -#include "cleo.h" +#include "CleoBase.h" #include "FileEnumerator.h" #include #include diff --git a/source/CTheScripts.cpp b/source/CTheScripts.cpp deleted file mode 100644 index c2326a48..00000000 --- a/source/CTheScripts.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include "stdafx.h" -#include "CTheScripts.h" -#include "cleo.h" -#include "CFileMgr.h" - -using namespace CLEO; - - -bool CRunningScript::IsCustom() const -{ - auto cs = reinterpret_cast(this); - return cs->IsCustom(); -} - -const char* CRunningScript::GetScriptFileDir() const -{ - if (IsCustom()) - { - return reinterpret_cast(this)->GetScriptFileDir(); - } - - return GetInstance().ScriptEngine.MainScriptFileDir.c_str(); -} - -void CRunningScript::SetScriptFileDir(const char* directory) -{ - if (IsCustom()) - { - reinterpret_cast(this)->SetScriptFileDir(directory); - return; - } - - GetInstance().ScriptEngine.MainScriptFileDir = directory; -} - -const char* CRunningScript::GetScriptFileName() const -{ - if (IsCustom()) - { - return reinterpret_cast(this)->GetScriptFileName(); - } - - return GetInstance().ScriptEngine.MainScriptFileName.c_str(); -} - -void CRunningScript::SetScriptFileName(const char* filename) -{ - if (IsCustom()) - { - reinterpret_cast(this)->SetScriptFileName(filename); - return; - } - - GetInstance().ScriptEngine.MainScriptFileName = filename; -} - -const char* CRunningScript::GetWorkDir() const -{ - if (IsCustom()) - { - return reinterpret_cast(this)->GetWorkDir(); - } - - return GetInstance().ScriptEngine.MainScriptCurWorkDir.c_str(); -} - -void CRunningScript::SetWorkDir(const char* directory) -{ - if (IsCustom()) - { - reinterpret_cast(this)->SetWorkDir(directory); - return; - } - - GetInstance().ScriptEngine.MainScriptCurWorkDir = directory; -} - -std::string CRunningScript::ResolvePath(const char* path, const char* customWorkDir) const -{ - if (path == nullptr) - { - return {}; - } - - std::string result; - if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter - { - result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); - result.push_back('\\'); - result += path; - } - else - { - result = path; - } - - // predefined CLEO paths starting with '[digit]:' - if (result.length() < 2 || result[1] != ':' || - result[0] < DIR_GAME[0] || result[0] > DIR_MODULES[0]) // supported range - { - return result; // not predefined path prefix found - } - - if (result[0] == DIR_USER[0]) // saves/settings location - { - return std::string(GetUserDirectory()) + &result[2]; // original path without '1:' prefix; - } - - if (result[0] == DIR_SCRIPT[0]) // current script location - { - return std::string(GetScriptFileDir()) + &result[2]; // original path without '2:' prefix; - } - - // game root directory - std::string resolved = CFileMgr::ms_rootDirName; - - if (result[0] == DIR_CLEO[0]) // cleo directory - { - resolved += "\\cleo"; - } - else if (result[0] == DIR_MODULES[0]) // cleo modules directory - { - resolved += "\\cleo\\cleo_modules"; - } - - resolved += &result[2]; // original path without 'X:' prefix - return resolved; -} - diff --git a/source/CTheScripts.h b/source/CTheScripts.h deleted file mode 100644 index 0ee03462..00000000 --- a/source/CTheScripts.h +++ /dev/null @@ -1,307 +0,0 @@ -#pragma once - -enum eDataType -{ - DT_END, - DT_DWORD, - DT_VAR, - DT_LVAR, - DT_BYTE, - DT_WORD, - DT_FLOAT, - DT_VAR_ARRAY, - DT_LVAR_ARRAY, - DT_TEXTLABEL, - DT_VAR_TEXTLABEL, - DT_LVAR_TEXTLABEL, - DT_VAR_TEXTLABEL_ARRAY, - DT_LVAR_TEXTLABEL_ARRAY, - DT_VARLEN_STRING, - DT_STRING, - DT_VAR_STRING, - DT_LVAR_STRING, - DT_VAR_STRING_ARRAY, - DT_LVAR_STRING_ARRAY -}; - -enum eLogicalOperation : WORD -{ - NONE = 0, // just replace - - AND_2 = 1, // AND operation on results of next two conditional opcodes - AND_3, - AND_4, - AND_5, - AND_6, - AND_7, - AND_END, - - OR_2 = 21, // OR operation on results of next two conditional opcodes - OR_3, - OR_4, - OR_5, - OR_6, - OR_7, - OR_END, -}; -static eLogicalOperation& operator--(eLogicalOperation& o) -{ - if (o == eLogicalOperation::NONE) return o; // can not be decremented anymore - if (o == eLogicalOperation::OR_2) return o = eLogicalOperation::NONE; - - auto val = static_cast(o); // to number - val--; - return o = static_cast(val); -} - -union SCRIPT_VAR -{ - DWORD dwParam; - short wParam; - WORD usParam; - BYTE ucParam; - char cParam; - bool bParam; - int nParam; - float fParam; - void* pParam; - char* pcParam; -}; - -class CRunningScript -{ -protected: - CRunningScript *Next; // +0x0 - CRunningScript *Previous; // +0x4 - char Name[8]; // +0x8 - void *BaseIP; // +0x10 - BYTE *CurrentIP; // +0x14 - BYTE *Stack[8]; // +0x18 - WORD SP; // +0x38 - SCRIPT_VAR LocalVar[32]; // +0x3C - DWORD Timers[2]; // +0xBC - bool bIsActive; // +0xC4 - bool bCondResult; // +0xC5 - bool bUseMissionCleanup; // +0xC6 - bool bIsExternal; // +0xC7 - bool bTextBlockOverride; // +0xC8 - BYTE bExternalType; // +0xC9 - DWORD WakeTime; // +0xCC - eLogicalOperation LogicalOp; // +0xD0 - bool NotFlag; // +0xD2 - bool bWastedBustedCheck; // +0xD3 - bool bWastedOrBusted; // +0xD4 - void *SceneSkipIP; // +0xD8 - bool bIsMission; // +0xDC - -public: - inline bool IsActive() { - return bIsActive; - } - inline bool IsExternal() { - return bIsExternal; - } - inline bool IsMission() { - return bIsMission; - } - inline const char * GetName() { - return Name; - } - inline BYTE * GetBasePointer() { - return (BYTE*)BaseIP; - } - inline BYTE * GetBytePointer() { - return CurrentIP; - } - inline void SetIp(void *ip) { - CurrentIP = (BYTE*)ip; - } - inline void SetBaseIp(void *ip) { - BaseIP = ip; - } - inline CRunningScript * GetNext() { - return Next; - } - inline CRunningScript * GetPrev() { - return Previous; - } - inline void SetIsExternal(bool b) { - bIsExternal = b; - } - inline void SetActive(bool b) { - bIsActive = b; - } - inline void SetNext(CRunningScript *v) { - Next = v; - } - inline void SetPrev(CRunningScript *v) { - Previous = v; - } - inline SCRIPT_VAR * GetVarPtr() { - return LocalVar; - } - inline SCRIPT_VAR * GetVarPtr(int i) { - return &LocalVar[i]; - } - inline int * GetIntVarPtr(int i) { - return (int*)&LocalVar[i].dwParam; - } - inline int GetIntVar(int i) { - return LocalVar[i].dwParam; - } - inline void SetIntVar(int i, int v) { - LocalVar[i].dwParam = v; - } - inline void SetFloatVar(int i, float v) { - LocalVar[i].fParam = v; - } - inline char GetByteVar(int i) { - return LocalVar[i].bParam; - } - inline bool GetConditionResult() { - return bCondResult != 0; - } - /* - inline int GetLocalVarVal(int i) { - return IsMission() ? CTheScripts::GetMissionLocal(i) : GetIntVar(i); - } - inline int * GetLocalVarPtr(int i) { - return IsMission() ? CTheScripts::GetMissionLocalPtr(i) : GetIntVarPtr(i); - }*/ - - inline char ReadDataType() { - return ReadDataByte(); - } - inline short ReadDataVarIndex() { - return ReadDataWord(); - } - inline short ReadDataArrayOffset() { - return ReadDataWord(); - } - inline short ReadDataArrayIndex() { - return ReadDataWord(); - } - inline short ReadDataArraySize() { - return ReadDataByte(); - } - inline short ReadDataArrayFlags() { - return ReadDataByte(); - } - /* - inline int ReadDataGlobalIntVal() - { - short i = ReadDataVarIndex(); - return CTheScripts::GetGlobalVarVal(i); - }; - - inline int ReadDataLocalIntVal() - { - short i = ReadDataVarIndex(); - return IsMission() ? CTheScripts::GetMissionLocal(i) : GetIntVar(i); - };*/ - /* - inline int ReadDataGlobalArrayIntVal() - { - short offset = ReadDataArrayOffset(), - index = ReadDataArrayIndex(), - size = ReadDataArraySize(), - flags = ReadDataArrayFlags(), - indexVal; - - if(flags >= 0x80) indexVal = GetLocalVarVal(index); - else indexVal = CTheScripts::GetGlobalVarVal(index); - - return CTheScripts::GetGlobalVarVal(indexVal + offset); - }; - - inline int ReadDataLocalArrayIntVal() - { - short offset = ReadDataArrayOffset(), - index = ReadDataArrayIndex(), - size = ReadDataArraySize(), - flags = ReadDataArrayFlags(), - indexVal; - - if(flags >= 0x80) indexVal = GetLocalVarVal(index); - else indexVal = CTheScripts::GetGlobalVarVal(index); - - return GetLocalVarVal(indexVal + offset); - };*/ - - inline void IncPtr(int n = 1) { - CurrentIP += n; - }; - - inline int ReadDataByte() - { - char b = *CurrentIP; - ++CurrentIP; - return b; - }; - - inline short ReadDataWord() - { - short v = *(short*)CurrentIP; - CurrentIP += 2; - return v; - }; - - inline int ReadDataInt() - { - int i = *(int*)CurrentIP; - CurrentIP += 4; - return i; - }; - - inline void PushStack(BYTE *ptr) - { - Stack[SP++] = ptr; - } - - inline BYTE * PopStack() - { - return Stack[--SP]; - } - - CRunningScript() - { - strcpy(Name, "noname"); - BaseIP = 0; - Previous = 0; - Next = 0; - CurrentIP = 0; - memset(Stack, 0, sizeof(Stack)); - SP = 0; - WakeTime = 0; - bIsActive = 0; - bCondResult = 0; - bUseMissionCleanup = 0; - bIsExternal = 0; - bTextBlockOverride = 0; - bExternalType = -1; - memset(LocalVar, 0, sizeof(LocalVar)); - LogicalOp = eLogicalOperation::NONE; - NotFlag = 0; - bWastedOrBusted = 0; - SceneSkipIP = 0; - bIsMission = 0; - bWastedBustedCheck = 1; - } - - bool IsCustom() const; // is it CLEO Script? - - // absolute path to directory where script's source file is located - const char* GetScriptFileDir() const; - void SetScriptFileDir(const char* directory); - - // filename with type extension of script's source file - const char* GetScriptFileName() const; - void SetScriptFileName(const char* filename); - - // current working directory of this script. Can be changed ith 0A99 - const char* GetWorkDir() const; - void SetWorkDir(const char* directory); - - // convert to absolute path - std::string ResolvePath(const char* path, const char* customWorkDir = nullptr) const; -}; diff --git a/source/cleo.cpp b/source/CleoBase.cpp similarity index 94% rename from source/cleo.cpp rename to source/CleoBase.cpp index 5e0350c7..4ec711e5 100644 --- a/source/cleo.cpp +++ b/source/CleoBase.cpp @@ -1,5 +1,5 @@ #include "stdafx.h" -#include "cleo.h" +#include "CleoBase.h" namespace CLEO @@ -16,3 +16,4 @@ namespace CLEO _asm jmp dwFunc } } + diff --git a/source/cleo.h b/source/CleoBase.h similarity index 82% rename from source/cleo.h rename to source/CleoBase.h index 4eaae708..6ed95a7a 100644 --- a/source/cleo.h +++ b/source/CleoBase.h @@ -1,12 +1,10 @@ -#ifndef __CLEO_H -#define __CLEO_H +#pragma once #include "CCodeInjector.h" #include "CGameVersionManager.h" #include "CDebug.h" #include "CDmaFix.h" #include "CGameMenu.h" -#include "CleoVersion.h" #include "CModuleSystem.h" #include "CPluginSystem.h" #include "CScriptEngine.h" @@ -18,13 +16,6 @@ namespace CLEO { - // CLEO virtual paths prefixes. Expandable with ResolvePath - const char DIR_GAME[] = "0:"; // game root directory - const char DIR_USER[] = "1:"; // game save directory - const char DIR_SCRIPT[] = "2:"; // current script directory - const char DIR_CLEO[] = "3:"; // game\cleo directory - const char DIR_MODULES[] = "4:"; // game\cleo\modules directory - class CCleoInstance { bool m_bStarted; @@ -81,4 +72,3 @@ namespace CLEO CCleoInstance& GetInstance(); } -#endif diff --git a/source/CleoVersion.h b/source/CleoVersion.h deleted file mode 100644 index 8e8cf00e..00000000 --- a/source/CleoVersion.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#define CLEO_VERSION_MAIN 4 -#define CLEO_VERSION_MAJOR 5 -#define CLEO_VERSION_MINOR 0 - -#define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) - -#define CLEO_VERSION_DOT CLEO_VERSION_MAIN.CLEO_VERSION_MAJOR.CLEO_VERSION_MINOR -#define CLEO_VERSION_COMMA CLEO_VERSION_MAIN,CLEO_VERSION_MAJOR,CLEO_VERSION_MINOR - -#define __TO_STR(x) #x -#define TO_STR(x) __TO_STR(x) - -#define CLEO_VERSION_DOT_STR TO_STR(CLEO_VERSION_DOT) -#define CLEO_VERSION_COMMA_STR TO_STR(CLEO_VERSION_COMMA) diff --git a/source/dllmain.cpp b/source/dllmain.cpp index 15b7f106..14a6f846 100644 --- a/source/dllmain.cpp +++ b/source/dllmain.cpp @@ -1,5 +1,5 @@ #include "stdafx.h" -#include "cleo.h" +#include "CleoBase.h" #include "CDebug.h" class Starter diff --git a/source/stdafx.h b/source/stdafx.h index be7fe2ce..a0be6691 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -27,7 +27,7 @@ #include #include -#include "CleoVersion.h" +#include "..\cleo_sdk\CLEO.h" #include "CTheScripts.h" enum CLEO_Version From 34d37bc9545da6065a39331bd06baf2e115e732d Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 7 Oct 2023 09:10:01 +0200 Subject: [PATCH 015/216] opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin added more detailed error messages in some cases updated general methods for getting and setting string parameters added some CLEO properties and methods to CRunningScript struct export addded: CLEO_RegisterCallback export addded: CLEO_GetScriptVersion --- CHANGELOG.md | 7 +- .../FileSystemOperations.cpp | 249 +++++---- .../FileSystemOperations.vcxproj | 2 + cleo_plugins/IniFiles/IniFiles.cpp | 11 +- cleo_plugins/IntOperations/IntOperations.cpp | 10 +- cleo_sdk/CLEO.cpp | 19 +- cleo_sdk/CLEO.h | 63 ++- cleo_sdk/CLEO.lib | Bin 8062 -> 8768 bytes source/CCustomOpcodeSystem.cpp | 510 +++++++++--------- source/CCustomOpcodeSystem.h | 40 +- source/CGameVersionManager.cpp | 4 +- source/CScriptEngine.cpp | 54 +- source/CScriptEngine.h | 10 +- source/CleoBase.cpp | 47 ++ source/CleoBase.h | 40 +- source/cleo.def | 3 + source/stdafx.h | 13 - 17 files changed, 620 insertions(+), 462 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1af2e8a5..cf1dc76c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,14 @@ - added opcode 0DD5 (get_platform) - updated project settings - plugins moved to cleo\cleo_plugins directory -- rewriten Current Working Directory (editable with 0A99) handling. CWD changes are no longer affects internal game's processes and are not globally shared among all scripts. +- opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts. +- rewriten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts. +- added more detailed error messages in some cases +- updated general methods for getting and setting string parameters - introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules - new CLEO SDK export addded: CLEO_ResolvePath +- new CLEO SDK export addded: CLEO_RegisterCallback +- new CLEO SDK export addded: CLEO_GetScriptVersion ## 4.4.4 diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index e05de685..ed4a9a35 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -1,40 +1,156 @@ #include "plugin.h" #include "CLEO.h" +#include + using namespace CLEO; using namespace plugin; class FileSystemOperations { public: + static std::set m_hFileSearches; + + static void WINAPI OnFinalizeScriptObjects() + { + // clean up file searches + for (auto handle : m_hFileSearches) FindClose(handle); + m_hFileSearches.clear(); + } + FileSystemOperations() { - //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) + auto cleoVer = CLEO_GetVersion(); + if (cleoVer >= CLEO_VERSION) { //register opcodes + CLEO_RegisterOpcode(0x0AAB, Script_FS_FileExists); + CLEO_RegisterOpcode(0x0AE4, Script_FS_DirectoryExists); + CLEO_RegisterOpcode(0x0AE5, Script_FS_CreateDirectory); + CLEO_RegisterOpcode(0x0AE6, Script_FS_FindFirstFile); + CLEO_RegisterOpcode(0x0AE7, Script_FS_FindNextFile); + CLEO_RegisterOpcode(0x0AE8, Script_FS_FindClose); + CLEO_RegisterOpcode(0x0B00, Script_FS_DeleteFile); CLEO_RegisterOpcode(0x0B01, Script_FS_DeleteDirectory); CLEO_RegisterOpcode(0x0B02, Script_FS_MoveFile); CLEO_RegisterOpcode(0x0B03, Script_FS_MoveDir); CLEO_RegisterOpcode(0x0B04, Script_FS_CopyFile); CLEO_RegisterOpcode(0x0B05, Script_FS_CopyDir); + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); } else - MessageBox(HWND_DESKTOP, "An incorrect version of CLEO was loaded.", "FileSystemOperations.cleo", MB_ICONERROR); + { + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); + MessageBox(HWND_DESKTOP, err.data(), "FileSystemOperations.cleo", MB_ICONERROR); + } } + static std::string ReadPathParam(CRunningScript* thread) + { + std::string path(MAX_STR_LEN, '\0'); + CLEO_ReadStringOpcodeParam(thread, path.data(), MAX_STR_LEN); + CLEO_ResolvePath(thread, path.data(), MAX_STR_LEN); + path.resize(strlen(path.c_str())); + return path; + } + + // 0AAB=1, file_exists %1s% + static OpcodeResult WINAPI Script_FS_FileExists(CRunningScript* thread) + { + auto filename = ReadPathParam(thread); + + DWORD fAttr = GetFileAttributes(filename.c_str()); + bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY); + + CLEO_SetThreadCondResult(thread, exists); + return OR_CONTINUE; + } + + // 0AE4=1, directory_exist %1s% + static OpcodeResult WINAPI Script_FS_DirectoryExists(CRunningScript* thread) + { + auto filename = ReadPathParam(thread); + + DWORD fAttr = GetFileAttributes(filename.c_str()); + bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && (fAttr & FILE_ATTRIBUTE_DIRECTORY); + + CLEO_SetThreadCondResult(thread, exists); + return OR_CONTINUE; + } + + // 0AE5=1, create_directory %1s% //IF and SET + static OpcodeResult WINAPI Script_FS_CreateDirectory(CRunningScript* thread) + { + auto filename = ReadPathParam(thread); + + bool result = CreateDirectory(filename.c_str(), NULL) != 0; + + CLEO_SetThreadCondResult(thread, result); + return OR_CONTINUE; + } + + // 0AE6=3, %2d% = find_first_file %1s% get_filename_to %3s% //IF and SET + static OpcodeResult WINAPI Script_FS_FindFirstFile(CRunningScript* thread) + { + auto filename = ReadPathParam(thread); + WIN32_FIND_DATA ffd = { 0 }; + HANDLE handle = FindFirstFile(filename.c_str(), &ffd); + + CLEO_SetIntOpcodeParam(thread, (DWORD)handle); + + if (handle != INVALID_HANDLE_VALUE) + { + m_hFileSearches.insert(handle); + + CLEO_WriteStringOpcodeParam(thread, ffd.cFileName); + CLEO_SetThreadCondResult(thread, true); + } + else + { + CLEO_SkipOpcodeParams(thread, 1); + CLEO_SetThreadCondResult(thread, false); + } + return OR_CONTINUE; + } + + // 0AE7=2,%2s% = find_next_file %1d% //IF and SET + static OpcodeResult WINAPI Script_FS_FindNextFile(CRunningScript* thread) + { + auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + + WIN32_FIND_DATA ffd = { 0 }; + if (FindNextFile(handle, &ffd)) + { + CLEO_WriteStringOpcodeParam(thread, ffd.cFileName); + CLEO_SetThreadCondResult(thread, true); + } + else + { + CLEO_SkipOpcodeParams(thread, 1); + CLEO_SetThreadCondResult(thread, false); + } + return OR_CONTINUE; + } + + // 0AE8=1, find_close %1d% + static OpcodeResult WINAPI Script_FS_FindClose(CRunningScript* thread) + { + auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + FindClose(handle); + m_hFileSearches.erase(handle); + return OR_CONTINUE; + } + + // 0B00=1, delete_file %1s% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteFile(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B00=1,delete_file %1d% //IF and SET - ****************************************************************/ { - char FilePath[MAX_PATH]; - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); - CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + auto filename = ReadPathParam(thread); - CLEO_SetThreadCondResult(thread, DeleteFile(FilePath)); + CLEO_SetThreadCondResult(thread, DeleteFile(filename.c_str())); return OR_CONTINUE; } @@ -84,112 +200,71 @@ class FileSystemOperations return RemoveDirectory(path); } + // 0B01=1, delete_directory %1s% with_all_files_and_subdirectories %2d% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteDirectory(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B01=1,delete_directory %1d% with_all_files_and_subdirectories %2d% //IF and SET - ****************************************************************/ { - char DirPath[MAX_PATH]; - int DeleteAllInsideFlag; - BOOL result; - - CLEO_ReadStringOpcodeParam(thread, DirPath, sizeof(DirPath)); - CLEO_ResolvePath(thread, DirPath, sizeof(DirPath)); - - DeleteAllInsideFlag = CLEO_GetIntOpcodeParam(thread); + auto dirpath = ReadPathParam(thread); + int DeleteAllInsideFlag = CLEO_GetIntOpcodeParam(thread); + BOOL result; if (DeleteAllInsideFlag) { //remove directory with all files and subdirectories - result = DeleteDir(DirPath); + result = DeleteDir(dirpath.c_str()); } else { //try to remove as empty directory - result = RemoveDirectory(DirPath); + result = RemoveDirectory(dirpath.c_str()); } CLEO_SetThreadCondResult(thread, result); - return OR_CONTINUE; } + // 0B02=2, move_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveFile(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B02=2,move_file %1d% to %2d% //IF and SET - ****************************************************************/ { - char FilePath[MAX_PATH]; - char NewFilePath[MAX_PATH]; - BOOL result; - - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); - CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); + auto filepath = ReadPathParam(thread); + auto newFilepath = ReadPathParam(thread); - CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); - CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); - - result = GetFileAttributes(FilePath) & FILE_ATTRIBUTE_DIRECTORY; + BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; if (!result) - result = MoveFile(FilePath, NewFilePath); + result = MoveFile(filepath.c_str(), newFilepath.c_str()); CLEO_SetThreadCondResult(thread, result); - return OR_CONTINUE; } + // 0B03=2, move_directory %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveDir(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B03=2,move_directory %1d% to %2d% //IF and SET - ****************************************************************/ { - char FilePath[MAX_PATH]; - char NewFilePath[MAX_PATH]; - BOOL result; - - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); - CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); - - CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); - CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); + auto filepath = ReadPathParam(thread); + auto newFilepath = ReadPathParam(thread); - result = GetFileAttributes(FilePath) & FILE_ATTRIBUTE_DIRECTORY; + BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; if (result) - result = MoveFile(FilePath, NewFilePath); + result = MoveFile(filepath.c_str(), newFilepath.c_str()); CLEO_SetThreadCondResult(thread, result); - return OR_CONTINUE; } + // 0B04=2, copy_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_CopyFile(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B04=2,copy_file %1d% to %2d% //IF and SET - ****************************************************************/ { - char FilePath[MAX_PATH]; - char NewFilePath[MAX_PATH]; - BOOL result; - DWORD fattr; - - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); - CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); - - CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); - CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); + auto filepath = ReadPathParam(thread); + auto newFilepath = ReadPathParam(thread); - if (result = CopyFile(FilePath, NewFilePath, FALSE)) + BOOL result = CopyFile(filepath.c_str(), newFilepath.c_str(), FALSE); + if (result) { - //copy file attributes - fattr = GetFileAttributes(FilePath); - SetFileAttributes(NewFilePath, fattr); + // copy file attributes + DWORD fattr = GetFileAttributes(filepath.c_str()); + SetFileAttributes(newFilepath.c_str(), fattr); } - CLEO_SetThreadCondResult(thread, result); + CLEO_SetThreadCondResult(thread, result); return OR_CONTINUE; } @@ -245,23 +320,17 @@ class FileSystemOperations return TRUE; } + // 0B05=2, copy_directory %1d% to %2d% //IF and SET static OpcodeResult WINAPI Script_FS_CopyDir(CScriptThread* thread) - /**************************************************************** - Opcode Format - 0B05=2,copy_directory %1d% to %2d% //IF and SET - ****************************************************************/ { - char FilePath[MAX_PATH]; - char NewFilePath[MAX_PATH]; - - CLEO_ReadStringOpcodeParam(thread, FilePath, sizeof(FilePath)); - CLEO_ResolvePath(thread, FilePath, sizeof(FilePath)); - - CLEO_ReadStringOpcodeParam(thread, NewFilePath, sizeof(NewFilePath)); - CLEO_ResolvePath(thread, NewFilePath, sizeof(NewFilePath)); + auto filepath = ReadPathParam(thread); + auto newFilepath = ReadPathParam(thread); - CLEO_SetThreadCondResult(thread, CopyDir(FilePath, NewFilePath)); + BOOL result = CopyDir(filepath.c_str(), newFilepath.c_str()); + CLEO_SetThreadCondResult(thread, result); return OR_CONTINUE; } } fileSystemOperations; + +std::set FileSystemOperations::m_hFileSearches; diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index f9b721cf..7f1e6569 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -64,6 +64,7 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) + stdcpp17 true @@ -88,6 +89,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) + stdcpp17 true diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index 9b17459a..d6f1e357 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -1,5 +1,6 @@ #include #include "CLEO.h" +#include using namespace CLEO; @@ -8,8 +9,8 @@ class IniFiles public: IniFiles() { - //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) + auto cleoVer = CLEO_GetVersion(); + if (cleoVer >= CLEO_VERSION) { // register opcodes CLEO_RegisterOpcode(0x0AF0, Script_InifileGetInt); @@ -20,7 +21,11 @@ class IniFiles CLEO_RegisterOpcode(0x0AF5, Script_InifileWriteString); } else - MessageBox(HWND_DESKTOP, "An incorrect version of CLEO was loaded.", "IniFiles.cleo", MB_ICONERROR); + { + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); + MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_ICONERROR); + } } static OpcodeResult WINAPI Script_InifileGetInt(CScriptThread* thread) diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index bea244a6..d3b4363a 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -9,8 +9,8 @@ class IntOperations public: IntOperations() { - //check cleo version - if (CLEO_GetVersion() >= CLEO_VERSION) + auto cleoVer = CLEO_GetVersion(); + if (cleoVer >= CLEO_VERSION) { //register opcodes CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); @@ -29,7 +29,11 @@ class IntOperations CLEO_RegisterOpcode(0x0B1D, Scr_IntOp_SHL); } else - MessageBox(HWND_DESKTOP, "An incorrect version of CLEO was loaded.", "IntOpearations.cleo", MB_ICONERROR); + { + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); + MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_ICONERROR); + } } static OpcodeResult WINAPI Script_IntOp_AND(CScriptThread* thread) diff --git a/cleo_sdk/CLEO.cpp b/cleo_sdk/CLEO.cpp index 03bef1ce..8338f05e 100644 --- a/cleo_sdk/CLEO.cpp +++ b/cleo_sdk/CLEO.cpp @@ -27,6 +27,7 @@ CRunningScript::CRunningScript() bWastedOrBusted = 0; SceneSkipIP = 0; bIsMission = 0; + ScmFunction = 0; bIsCustom = 0; } @@ -76,6 +77,10 @@ char CRunningScript::GetByteVar(int i) const { return LocalVar[i].bParam; } bool CRunningScript::GetConditionResult() const { return bCondResult != 0; } +bool CRunningScript::CRunningScript::GetNotFlag() const { return NotFlag; } + +void CRunningScript::CRunningScript::SetNotFlag(bool state) { NotFlag = state; } + char CRunningScript::ReadDataType() { return ReadDataByte(); } short CRunningScript::ReadDataVarIndex() { return ReadDataWord(); } @@ -111,15 +116,13 @@ int CRunningScript::ReadDataInt() return i; } -void CRunningScript::PushStack(BYTE* ptr) -{ - Stack[SP++] = ptr; -} +void CRunningScript::PushStack(BYTE* ptr) { Stack[SP++] = ptr; } -BYTE* CRunningScript::PopStack() -{ - return Stack[--SP]; -} +BYTE* CRunningScript::PopStack() { return Stack[--SP]; } + +WORD CRunningScript::GetScmFunction() const { return ScmFunction; } + +void CRunningScript::SetScmFunction(WORD id) { ScmFunction = id; } #endif // __cplusplus diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index ef8b5f78..52ba7a1d 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -10,16 +10,27 @@ #define CLEO_VERSION_MAJOR 5 #define CLEO_VERSION_MINOR 0 -#define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) +#define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) // 0x0v0v0v00 #define __TO_STR(x) #x #define TO_STR(x) __TO_STR(x) -#define CLEO_VERSION_STR TO_STR(CLEO_VERSION_MAIN.CLEO_VERSION_MAJOR.CLEO_VERSION_MINOR) // "x.x.x" +#define CLEO_VERSION_STR TO_STR(CLEO_VERSION_MAIN.CLEO_VERSION_MAJOR.CLEO_VERSION_MINOR) // "v.v.v" namespace CLEO { +// result of CLEO_GetScriptVersion +enum eCLEO_Version : DWORD +{ + CLEO_VER_3 = 0x03000000, + CLEO_VER_4_MIN = 0x04000000, + CLEO_VER_4_2 = 0x04020000, + CLEO_VER_4_3 = 0x04030000, + CLEO_VER_4_4 = 0x04040000, + CLEO_VER_4 = CLEO_VER_4_4, + CLEO_VER_CUR = CLEO_VERSION +}; -//result of CLEO_GetGameVersion() +// result of CLEO_GetGameVersion enum eGameVersion : int { GV_US10 = 0, // 1.0 us @@ -31,7 +42,7 @@ enum eGameVersion : int GV_UNK = -1 // any other }; -//operand types +// operand types enum eDataType : int { DT_END, @@ -109,6 +120,18 @@ const char DIR_SCRIPT[] = "2:"; // current script directory const char DIR_CLEO[] = "3:"; // game\cleo directory const char DIR_MODULES[] = "4:"; // game\cleo\modules directory +// argument of CLEO_RegisterCallback +enum class eCallbackId +{ + ScmInit1, // void WINAPI OnScmInit1(); + ScmInit2, // void WINAPI OnScmInit2(); + ScmInit3, // void WINAPI OnScmInit3(); + ScriptsLoaded, // void WINAPI OnScriptsLoaded(); + ScriptsFinalize, // void WINAPI OnScriptsFinalize(); + ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript, int); // return false to skip this script processing + ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); +}; + typedef int SCRIPT_HANDLE; typedef SCRIPT_HANDLE HANDLE_ACTOR, ACTOR, HACTOR, PED, HPED, HANDLE_PED; typedef SCRIPT_HANDLE HANDLE_CAR, CAR, HCAR, VEHICLE, HVEHICLE, HANDLE_VEHICLE; @@ -149,7 +172,7 @@ struct CRunningScript char _padD5[3]; // 0xD5 padding void* SceneSkipIP; // 0xD8 scene skip label ptr bool bIsMission; // 0xDC is this script mission - BYTE _padDD[2]; // 0xDD padding + WORD ScmFunction; // 0xDD CLEO's previous scmFunction id bool bIsCustom; // 0xDF is this CLEO script #ifdef __cplusplus @@ -179,6 +202,8 @@ struct CRunningScript void SetFloatVar(int i, float v); char GetByteVar(int i) const; bool GetConditionResult() const; + bool GetNotFlag() const; + void SetNotFlag(bool state); char ReadDataType(); short ReadDataVarIndex(); @@ -194,6 +219,10 @@ struct CRunningScript void PushStack(BYTE* ptr); BYTE* PopStack(); + + WORD GetScmFunction() const; + void SetScmFunction(WORD id); + #endif // __cplusplus }; #pragma pack(pop) @@ -219,28 +248,30 @@ typedef void(*FuncScriptDeleteDelegateT) (CRunningScript*); extern "C" { #endif //__cplusplus - DWORD WINAPI CLEO_GetVersion(); - eGameVersion WINAPI CLEO_GetGameVersion(); +DWORD WINAPI CLEO_GetVersion(); +eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); +eGameVersion WINAPI CLEO_GetGameVersion(); -BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); +BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); +void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); -void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); +void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); +void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, LPSTR buf, int size); -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, LPSTR buf, int size); -void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); +LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, LPSTR buf, int size); // exactly same as CLEO_ReadStringOpcodeParam +void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); -void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); +void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); -void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); +void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); -void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); +void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); -eDataType WINAPI CLEO_GetOperandType(CRunningScript* thread); +eDataType WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peep data type extern SCRIPT_VAR *opcodeParams; diff --git a/cleo_sdk/CLEO.lib b/cleo_sdk/CLEO.lib index 548cf4ff6a4b4683c48055e12b028c29cd7db24e..327ffa245123295c3320de2c524457c5f437b323 100644 GIT binary patch delta 1040 zcmZwGUr1AN6bJC{)wQm3=Dc%%+}+(j0wF@yvXK7R$TS~>!opxsYw674(y1eS5QX(p zL}Z@uL4k7KBZ1w|Z3}?&fNd0zrUCm3;ItZY95vfIcMQ;Y>I_pHnLMHM4g(rxRp)C3cFl8S z{)l00mM>VqV;JYif}$bAQM0CVFY$w!Ea_@m0FA3#=a>XEt__{T%lFK?rBk~22eFzt zo!M*VgqbdsN7N1>Y#xg@wKZRfwkDcQN*m9)n;Oa{2b&V<^qoX!Z#39YP}rU<$~WMd80Mj!+8bJcr#5N> zrnw_4cEIWca%`K$+&~?7c8X2Nz%X}WRRLOkz&wlifm!C^O~#oR1Tt)mEpe^HH7zwj zw+onb02>^=4pd`{H|pYz<~V8QYd$vAFFLCH5K#sfEC`?+3Vd`^tlj^u`c!NcDvLTs zpUQG#z7f=DX}1191B%N&N|!3MY!TJ>mSD-g^3T?pf92NFAlXVQsQ0i;GZjY)wo+eI zyb;QdzZ>{z7WhBYOX)N{smz!{SR5h2cB5o*M2*=b!So!zzfz{^K|WJUK1t*(#cUqB zDBU{r%JT5rDh*Wm>7(>Gze=V)dw!KVVgF$wr&4|N%j@#>5RZzR1^poHKNPg-Jfn`d z*9CGZ_Rzxj`em(x+iX-6p9YG$&ymMfO+B9VlVSZW4g|FKcxMM_6LJf0fAN&TQ{3C3 Rpr@P8v-C)@tJmdYroTVw0Wkmo delta 626 zcmYk&y=xP37zgn0U1LmgkmO!+m)A>&($J70g0`9JU@aD_4Ygotp$U!-H4Xu{K!q#? zDLkE|e?qCnTRJ$1-3oS4C{#ogq(dfMI#uiUa!K(AAHMf{d-&b+e&+A=R;_cRcB?kp zAI%tmDIgIC^a@}!0c$6aGJw7fCfU6}dLHOIU{Yp3kR1bjx#U2X>AMKzTR>muj2~23 z9yn%_J3Fw)q@o>gRsdh78gNcHr__1C8s~$_UJsbNfG@l4Yc%E_)E5?)8Z&nv-C3Ng zFD-Zvm3mmFBULtH@~RO9b|RhtDmu{QukToFM+(@W9$zw_1`cRH1B_*XWDe-k1-0_P z87;HOG0nQb9@#};#R00kbHY4lt#Q^}XT4n$VQjO|lw3v}ErKW<#Gs)drD*B@xz~u^ zQ>3ewC8=GPr#V*=+Jv|xF?SONnV)|?#hv#mAB^Fwa=H8kDug+ nQIwn3=I~TbzFEUlo|@`~ytVA9BAaPb-sMl_{;28MZaMT1GetBytePointer(); - if (!paramType) return nullptr; - - if (paramType >= DT_DWORD && paramType <= DT_LVAR_ARRAY) // process parameter as a pointer to string + auto paramType = CLEO_GetOperandType(thread); + switch(paramType) { - GetScriptParams(thread, 1); + // address of string buffer + case DT_DWORD: + case DT_VAR: + case DT_LVAR: + case DT_VAR_ARRAY: + case DT_LVAR_ARRAY: + GetScriptParams(thread, 1); + if (buf != nullptr) + { + strncpy(buf, opcodeParams[0].pcParam, size - 1); + buf[size - 1] = '\0'; + } + return opcodeParams[0].pcParam; // original string pointer - if (buf != nullptr) - { - strncpy(buf, opcodeParams[0].pcParam, size - 1); - buf[size - 1] = '\0'; - } + // short string variable + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: - return opcodeParams[0].pcParam; // original string pointer - } - else // process as scm string - { - // no user output buffer provided - if (buf == nullptr) + // long string variable + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: + + // in-code string + case DT_TEXTLABEL: // sstring '' + case DT_STRING: + case DT_VARLEN_STRING: { - static char result[MAX_STR_LEN]; - buf = result; - size = sizeof(result); - } + if (buf == nullptr) // provide buffer if not specified + { + static char result[MAX_STR_LEN]; + buf = result; + size = sizeof(result); + memset(buf, 0, size); + } - std::fill(buf, buf + size, '\0'); + if (paramType == DT_VARLEN_STRING) + { + // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 + thread->IncPtr(1); // already processed paramType - if (paramType == DT_VARLEN_STRING) - { - // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 - thread->IncPtr(1); // already read paramType + BYTE length = *thread->GetBytePointer(); // as unsigned! + thread->IncPtr(1); // length - BYTE length = *thread->GetBytePointer(); // as unsigned! - thread->IncPtr(1); // length + if (length > 0) + { + auto count = min(size, length); + memcpy(buf, thread->GetBytePointer(), count); - if (length > 0) + thread->IncPtr(length); // skip read data + } + } + else { - auto count = min(size, length); - memcpy(buf, thread->GetBytePointer(), count); - - thread->IncPtr(length); // read text + GetScriptStringParam(thread, buf, size); // standard game's function } + + return buf; } - else + break; + } + + // unsupported param type + GetScriptParams(thread, 1); // skip unhandled param + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "Reading string from invalid argument type (%02X) in script '%s'", paramType, reinterpret_cast(thread)->GetScriptFileName()); + Error(err.data()); + return nullptr; + } + + // write output\result string parameter + bool WriteStringParam(CRunningScript* thread, const char* str) + { + size_t len = str == nullptr ? 0 : strlen(str); + len = min(len, MAX_STR_LEN - 1); // and terminator char + + char* targetBuff; + auto paramType = CLEO_GetOperandType(thread); + switch(paramType) + { + // address to output buffer + case DT_DWORD: + case DT_VAR: + case DT_LVAR: + case DT_VAR_ARRAY: + case DT_LVAR_ARRAY: + GetScriptParams(thread, 1); + targetBuff = opcodeParams[0].pcParam; + break; + + // short string variable + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: + targetBuff = (char*)GetScriptParamPointer(thread); + len = min(len, 7); // 8 with terminator + GetScriptParams(thread, 1); // param processed + break; + + // long string variable + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: + targetBuff = (char*)GetScriptParamPointer(thread); + len = min(len, 15); // 16 with terminator + GetScriptParams(thread, 1); // param processed + break; + + default: { - GetScriptStringParam(thread, buf, size); + GetScriptParams(thread, 1); // skip unhandled param + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "Outputing string into invalid argument type (%02X) in script '%s'", paramType, reinterpret_cast(thread)->GetScriptFileName()); + Error(err.data()); + return false; } - - return buf; } + + if(len > 0) std::memcpy(targetBuff, str, len); + targetBuff[len] = '\0'; + return true; // ok } // perform 'sprintf'-operation for parameters, passed through SCM - int format(CRunningScript *thread, char *str, size_t len, const char *format) + int ReadFormattedString(CRunningScript *thread, char *outputStr, size_t len, const char *format) { unsigned int written = 0; const char *iter = format; @@ -473,15 +586,16 @@ namespace CLEO { { if (written++ >= len) return -1; - *str++ = *iter++; + *outputStr++ = *iter++; } + if (*iter == '%') { if (iter[1] == '%') { if (written++ >= len) return -1; - *str++ = '%'; /* "%%"->'%' */ + *outputStr++ = '%'; /* "%%"->'%' */ iter += 2; continue; } @@ -499,7 +613,9 @@ namespace CLEO { if (*iter == '*') { char *buffiter = bufa; + //get width + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); _itoa(opcodeParams[0].dwParam, buffiter, 10); while (*buffiter) @@ -521,6 +637,7 @@ namespace CLEO { if (*iter == '*') { char *buffiter = bufa; + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); _itoa(opcodeParams[0].dwParam, buffiter, 10); while (*buffiter) @@ -539,13 +656,14 @@ namespace CLEO { case 's': { static const char none[] = "(null)"; - const char *astr = readString(thread); + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; + const char *astr = ReadStringParam(thread); const char *striter = astr ? astr : none; while (*striter) { if (written++ >= len) return -1; - *str++ = *striter++; + *outputStr++ = *striter++; } iter++; break; @@ -554,8 +672,9 @@ namespace CLEO { case 'c': if (written++ >= len) return -1; + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); - *str++ = (char)opcodeParams[0].nParam; + *outputStr++ = (char)opcodeParams[0].nParam; iter++; break; @@ -566,6 +685,7 @@ namespace CLEO { char *bufaiter = bufa; if (*iter == 'p' || *iter == 'P') { + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, "%08X", opcodeParams[0].dwParam); } @@ -578,11 +698,13 @@ namespace CLEO { *iter == 'f' || *iter == 'F' || *iter == 'g' || *iter == 'G') { + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, fmtbufa, opcodeParams[0].fParam); } else { + if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, fmtbufa, opcodeParams[0].pParam); } @@ -591,7 +713,7 @@ namespace CLEO { { if (written++ >= len) return -1; - *str++ = *bufaiter++; + *outputStr++ = *bufaiter++; } iter++; break; @@ -599,16 +721,38 @@ namespace CLEO { } } } + if (written >= len) + { + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "Error while formatting string in script '%s'. Output buffer is too short!", reinterpret_cast(thread)->GetScriptFileName()); + Error(err.data()); return -1; - *str++ = 0; + } + + // still more var-args available + if (CLEO_GetOperandType(thread) != DT_END) + { + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "Error while formatting string in script '%s'. More arguments than slots in specified format!", reinterpret_cast(thread)->GetScriptFileName()); + Error(err.data()); + } + SkipUnusedParameters(thread); // skip terminator too + + *outputStr++ = '\0'; + return (int)written; + + ReadFormattedString_ArgMissing: + std::string err(MAX_STR_LEN, '\0'); + sprintf(err.data(), "Error while formatting string in script '%s'. Not enough arguments to match specified format!", reinterpret_cast(thread)->GetScriptFileName()); + Error(err.data()); return (int)written; } // Legacy modes for CLEO 3 - FILE * legacy_fopen(const char * szPath, const char * szMode) + FILE* legacy_fopen(const char* szPath, const char* szMode) { - FILE * hFile; + FILE* hFile; _asm { push szMode @@ -822,10 +966,12 @@ namespace CLEO { thread->SetIp(off < 0 ? thread->GetBasePointer() - off : scmBlock + off); } - inline void SkipUnusedParameters(CRunningScript *thread) + void SkipUnusedParameters(CRunningScript *thread) { - while (*thread->GetBytePointer()) GetScriptParams(thread, 1); // skip parameters - thread->ReadDataByte(); + while (CLEO_GetOperandType(thread) != DT_END) + GetScriptParams(thread, 1); // skip param + + thread->ReadDataByte(); // skip terminator } struct ScmFunction @@ -1044,7 +1190,7 @@ namespace CLEO { //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread), DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName()); auto cs = new CCustomScript(filename.c_str()); @@ -1080,7 +1226,7 @@ namespace CLEO { //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread), DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName()); @@ -1154,7 +1300,7 @@ namespace CLEO { } else { - reinterpret_cast(thread)->SetWorkDir(readString(thread)); + reinterpret_cast(thread)->SetWorkDir(ReadStringParam(thread)); } return OR_CONTINUE; } @@ -1162,7 +1308,7 @@ namespace CLEO { //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); auto paramType = *thread->GetBytePointer(); char mode[0x10]; @@ -1185,7 +1331,7 @@ namespace CLEO { else { // string param - GetScriptStringParam(thread, mode, sizeof(mode)); + ReadStringParam(thread, mode, sizeof(mode)); } if (auto hfile = open_file(filename.c_str(), mode, bLegacyMode)) @@ -1285,7 +1431,7 @@ namespace CLEO { //0AA2=2,%2h% = load_library %1d% // IF and SET OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); auto libHandle = LoadLibrary(filename.c_str()); *thread << libHandle; @@ -1308,7 +1454,7 @@ namespace CLEO { //0AA4=3,%3d% = get_proc_address %1d% library %2d% // IF and SET OpcodeResult __stdcall opcode_0AA4(CRunningScript *thread) { - char *funcName = readString(thread); + char *funcName = ReadStringParam(thread); HMODULE libHandle; *thread >> libHandle; void *funcAddr = (void *)GetProcAddress(libHandle, funcName); @@ -1353,7 +1499,7 @@ namespace CLEO { break; case DT_VARLEN_STRING: case DT_TEXTLABEL: - (*arg).pcParam = readString(thread, textParams[currTextParam++], MAX_STR_LEN); + (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); } } @@ -1413,7 +1559,7 @@ namespace CLEO { break; case DT_VARLEN_STRING: case DT_TEXTLABEL: - arg->pcParam = readString(thread, textParams[currTextParam++], MAX_STR_LEN); + arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); } } @@ -1472,7 +1618,7 @@ namespace CLEO { break; case DT_VARLEN_STRING: case DT_TEXTLABEL: - arg->pcParam = readString(thread, textParams[currTextParam++], MAX_STR_LEN); + arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); break; } } @@ -1537,7 +1683,7 @@ namespace CLEO { break; case DT_VARLEN_STRING: case DT_TEXTLABEL: - arg->pcParam = readString(thread, textParams[currTextParam++], MAX_STR_LEN); + arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); } } @@ -1576,7 +1722,7 @@ namespace CLEO { //0AAA=2, %2d% = thread %1d% pointer // IF and SET OpcodeResult __stdcall opcode_0AAA(CRunningScript *thread) { - char *threadName = readString(thread); + char *threadName = ReadStringParam(thread); threadName[7] = '\0'; CRunningScript *cs = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); if (!cs) cs = GetInstance().ScriptEngine.FindScriptNamed(threadName); @@ -1585,20 +1731,10 @@ namespace CLEO { return OR_CONTINUE; } - //0AAB=1, file_exists %1d% - OpcodeResult __stdcall opcode_0AAB(CRunningScript *thread) - { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); - - DWORD fAttr = GetFileAttributes(filename.c_str()); - SetScriptCondResult(thread, (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY)); - return OR_CONTINUE; - } - //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(readString(thread)); + auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); *thread << stream; @@ -1685,7 +1821,7 @@ namespace CLEO { case DT_STRING: case DT_TEXTLABEL: case DT_VARLEN_STRING: - moduleTxt = readString(thread); + moduleTxt = ReadStringParam(thread); break; default: @@ -1776,7 +1912,7 @@ namespace CLEO { case DT_STRING: case DT_TEXTLABEL: case DT_VARLEN_STRING: - scmFunc->stringParams.emplace_back(readString(thread)); // those texts exists in script code, but without terminator character. Copy is necessary + scmFunc->stringParams.emplace_back(ReadStringParam(thread)); // those texts exists in script code, but without terminator character. Copy is necessary arg->pcParam = (char*)scmFunc->stringParams.back().c_str(); break; } @@ -1931,7 +2067,7 @@ namespace CLEO { //0ABA=1,end_custom_thread_named %1d% OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) { - char *threadName = readString(thread); + char *threadName = ReadStringParam(thread); auto deleted_thread = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); if (deleted_thread) { @@ -2001,7 +2137,7 @@ namespace CLEO { //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET OpcodeResult __stdcall opcode_0AC1(CRunningScript *thread) { - auto stream = GetInstance().SoundSystem.LoadStream(readString(thread), true); + auto stream = GetInstance().SoundSystem.LoadStream(ReadStringParam(thread), true); *thread << stream; SetScriptCondResult(thread, stream != nullptr); return OR_CONTINUE; @@ -2092,14 +2228,14 @@ namespace CLEO { //0ACA=1,show_text_box %1d% OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) { - PrintHelp(readString(thread)); + PrintHelp(ReadStringParam(thread)); return OR_CONTINUE; } //0ACB=3,show_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) { - const char *text = readString(thread); + const char *text = ReadStringParam(thread); DWORD time, style; *thread >> time >> style; @@ -2110,7 +2246,7 @@ namespace CLEO { //0ACC=2,show_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) { - const char *text = readString(thread); + const char *text = ReadStringParam(thread); DWORD time; *thread >> time; Print(text, time); @@ -2120,7 +2256,7 @@ namespace CLEO { //0ACD=2,show_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) { - const char *text = readString(thread); + const char *text = ReadStringParam(thread); DWORD time; *thread >> time; PrintNow(text, time); @@ -2132,10 +2268,9 @@ namespace CLEO { { char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; - readString(thread, fmt, sizeof(fmt)); - format(thread, text, sizeof(text), fmt); + ReadStringParam(thread, fmt, sizeof(fmt)); + ReadFormattedString(thread, text, sizeof(text), fmt); PrintHelp(text); - SkipUnusedParameters(thread); return OR_CONTINUE; } @@ -2144,11 +2279,10 @@ namespace CLEO { { char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; DWORD time, style; - readString(thread, fmt, sizeof(fmt)); + ReadStringParam(thread, fmt, sizeof(fmt)); *thread >> time >> style; - format(thread, text, sizeof(text), fmt); + ReadFormattedString(thread, text, sizeof(text), fmt); PrintBig(text, time, style); - SkipUnusedParameters(thread); return OR_CONTINUE; } @@ -2157,11 +2291,10 @@ namespace CLEO { { char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; DWORD time; - readString(thread, fmt, sizeof(fmt)); + ReadStringParam(thread, fmt, sizeof(fmt)); *thread >> time; - format(thread, text, sizeof(text), fmt); + ReadFormattedString(thread, text, sizeof(text), fmt); Print(text, time); - SkipUnusedParameters(thread); return OR_CONTINUE; } @@ -2170,11 +2303,10 @@ namespace CLEO { { char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; DWORD time; - readString(thread, fmt, sizeof(fmt)); + ReadStringParam(thread, fmt, sizeof(fmt)); *thread >> time; - format(thread, text, sizeof(text), fmt); + ReadFormattedString(thread, text, sizeof(text), fmt); PrintNow(text, time); - SkipUnusedParameters(thread); return OR_CONTINUE; } @@ -2207,9 +2339,8 @@ namespace CLEO { if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread >> dst; else dst = &GetScriptParamPointer(thread)->cParam; - readString(thread, fmt, sizeof(fmt)); - format(thread, dst, -1, fmt); - SkipUnusedParameters(thread); + ReadStringParam(thread, fmt, sizeof(fmt)); + ReadFormattedString(thread, dst, MAX_STR_LEN, fmt); return OR_CONTINUE; } @@ -2217,8 +2348,8 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread) { char fmt[MAX_STR_LEN], *format, *src; - src = readString(thread); - format = readString(thread, fmt, sizeof(fmt)); + src = ReadStringParam(thread); + format = ReadStringParam(thread, fmt, sizeof(fmt)); size_t cExParams = 0; int *result = (int *)GetScriptParamPointer(thread); @@ -2293,7 +2424,7 @@ namespace CLEO { *thread >> hFile; if (FILE * file = convert_handle_to_file(hFile)) { - SetScriptCondResult(thread, fputs(readString(thread), file) > 0); + SetScriptCondResult(thread, fputs(ReadStringParam(thread), file) > 0); fflush(file); } else { @@ -2308,14 +2439,13 @@ namespace CLEO { char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; DWORD hFile; *thread >> hFile; - readString(thread, fmt, sizeof(fmt)); - format(thread, text, sizeof(text), fmt); + ReadStringParam(thread, fmt, sizeof(fmt)); + ReadFormattedString(thread, text, sizeof(text), fmt); if (FILE * file = convert_handle_to_file(hFile)) { fputs(text, file); fflush(file); } - SkipUnusedParameters(thread); return OR_CONTINUE; } @@ -2324,7 +2454,7 @@ namespace CLEO { { DWORD hFile; *thread >> hFile; - char *fmt = readString(thread); + char *fmt = ReadStringParam(thread); int *result = (int *)GetScriptParamPointer(thread); @@ -2373,7 +2503,7 @@ namespace CLEO { //0ADC=1, test_cheat %1d% OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - SetScriptCondResult(thread, TestCheat(readString(thread))); + SetScriptCondResult(thread, TestCheat(ReadStringParam(thread))); return OR_CONTINUE; } @@ -2398,7 +2528,7 @@ namespace CLEO { //0ADE=2,%2d% = text_by_GXT_entry %1d% OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread) { - const char *gxt = readString(thread); + const char *gxt = ReadStringParam(thread); if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread << GetInstance().TextManager.Get(gxt); else @@ -2410,9 +2540,9 @@ namespace CLEO { OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) { char gxtLabel[8]; // 7 + terminator character - readString(thread, gxtLabel, sizeof(gxtLabel)); + ReadStringParam(thread, gxtLabel, sizeof(gxtLabel)); - char *text = readString(thread); + char *text = ReadStringParam(thread); GetInstance().TextManager.AddFxt(gxtLabel, text); return OR_CONTINUE; @@ -2421,7 +2551,7 @@ namespace CLEO { //0AE0=1,remove_dynamic_GXT_entry %1d% OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread) { - GetInstance().TextManager.RemoveFxt(readString(thread)); + GetInstance().TextManager.RemoveFxt(ReadStringParam(thread)); return OR_CONTINUE; } @@ -2548,121 +2678,6 @@ namespace CLEO { return OR_CONTINUE; } - //0AE4=1, directory_exist %1d% - OpcodeResult __stdcall opcode_0AE4(CRunningScript *thread) - { - auto fAttr = GetFileAttributes(readString(thread)); - SetScriptCondResult(thread, (fAttr != INVALID_FILE_ATTRIBUTES) && (fAttr & FILE_ATTRIBUTE_DIRECTORY)); - return OR_CONTINUE; - } - - //0AE5=1,create_directory %1d% //IF and SET - OpcodeResult __stdcall opcode_0AE5(CRunningScript *thread) - { - bool condResult = CreateDirectory(readString(thread), NULL) != 0; - SetScriptCondResult(thread, condResult); - return OR_CONTINUE; - } - - //0AE6=3,%2d% = find_first_file %1d% get_filename_to %3d% //IF and SET - OpcodeResult __stdcall opcode_0AE6(CRunningScript *thread) - { - WIN32_FIND_DATA ffd; - memset(&ffd, 0, sizeof(ffd)); - - HANDLE handle = FindFirstFile(readString(thread), &ffd); - *thread << handle; - GetInstance().OpcodeSystem.m_hFileSearches.insert(handle); - if (handle != INVALID_HANDLE_VALUE) - { - auto type = *thread->GetBytePointer(); - char* str; - switch (type) - { - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_STRING_ARRAY: - case DT_LVAR_STRING_ARRAY: - str = (char*)GetScriptParamPointer(thread); - memcpy(str, ffd.cFileName, 16); - str[15] = '\0'; - break; - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_LVAR_TEXTLABEL_ARRAY: - str = (char*)GetScriptParamPointer(thread); - memcpy(str, ffd.cFileName, 8); - str[7] = '\0'; - break; - default: - *thread >> str; - strcpy(str, ffd.cFileName); - } - SetScriptCondResult(thread, true); - } - else - { - readString(thread); - SetScriptCondResult(thread, false); - } - return OR_CONTINUE; - } - - //0AE7=2,%2d% = find_next_file %1d% //IF and SET - OpcodeResult __stdcall opcode_0AE7(CRunningScript *thread) - { - WIN32_FIND_DATA ffd; - memset(&ffd, 0, sizeof(ffd)); - - HANDLE handle; - *thread >> handle; - if (FindNextFile(handle, &ffd)) - { - auto type = *thread->GetBytePointer(); - char* str; - switch (type) - { - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_STRING_ARRAY: - case DT_LVAR_STRING_ARRAY: - str = (char*)GetScriptParamPointer(thread); - memcpy(str, ffd.cFileName, 16); - str[15] = '\0'; - break; - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_LVAR_TEXTLABEL_ARRAY: - str = (char*)GetScriptParamPointer(thread); - memcpy(str, ffd.cFileName, 8); - str[7] = '\0'; - break; - default: - *thread >> str; - strcpy(str, ffd.cFileName); - } - SetScriptCondResult(thread, true); - } - else - { - readString(thread); - SetScriptCondResult(thread, false); - } - return OR_CONTINUE; - } - - //0AE8=1,find_close %1d% - OpcodeResult __stdcall opcode_0AE8(CRunningScript *thread) - { - HANDLE handle; - *thread >> handle; - FindClose(handle); - GetInstance().OpcodeSystem.m_hFileSearches.erase(handle); - return OR_CONTINUE; - } - //0AE9=0,pop_float OpcodeResult __stdcall opcode_0AE9(CRunningScript *thread) { @@ -2707,7 +2722,7 @@ namespace CLEO { float val; char *format, *result; *thread >> val; - format = readString(thread); + format = ReadStringParam(thread); if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread >> result; else @@ -2757,7 +2772,7 @@ extern "C" BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, CustomOpcodeHandler callback) { - if ((opcode > 0x7FFF) || (opcode < 0x0AF0)) + if ((opcode > 0x7FFF) || (opcode < 0x0AAB)) return FALSE; CustomOpcodeHandler& dst = extraOpcodeHandlers[opcode % 100][opcode / 100 - 28]; @@ -2801,12 +2816,7 @@ extern "C" LPSTR WINAPI CLEO_ReadStringOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) { - static char internal_buf[MAX_STR_LEN]; - if (!buf) { buf = internal_buf; size = MAX_STR_LEN; } - if (!size) size = MAX_STR_LEN; - std::fill(buf, buf + size, '\0'); - GetScriptStringParam(thread, buf, size); - return buf; + return CLEO_ReadStringPointerOpcodeParam(thread, buf, size); // always support all string param types } LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) @@ -2815,14 +2825,12 @@ extern "C" if (!buf) { buf = internal_buf; size = MAX_STR_LEN; } if (!size) size = MAX_STR_LEN; std::fill(buf, buf + size, '\0'); - return readString(thread, buf, size); + return ReadStringParam(thread, buf, size); } void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, LPCSTR str) { - auto dst = (char *)GetScriptParamPointer(thread); - memcpy(dst, str, 16); - dst[15] = '\0'; + WriteStringParam(thread, str); } void WINAPI CLEO_SetThreadCondResult(CLEO::CRunningScript* thread, BOOL result) @@ -2832,7 +2840,6 @@ extern "C" void WINAPI CLEO_SkipOpcodeParams(CLEO::CRunningScript* thread, int count) { - int len; for (int i = 0; i < count; i++) { switch (thread->ReadDataType()) @@ -2860,8 +2867,7 @@ extern "C" thread->IncPtr(4); break; case DT_VARLEN_STRING: - len = thread->ReadDataByte(); - thread->IncPtr(len); + thread->IncPtr((int)1 + *thread->GetBytePointer()); // as unsigned! length byte + string data break; case DT_TEXTLABEL: @@ -2879,9 +2885,9 @@ extern "C" ThreadJump(thread, labelPtr); } - int WINAPI CLEO_GetOperandType(CLEO::CRunningScript* thread) + eDataType WINAPI CLEO_GetOperandType(const CLEO::CRunningScript* thread) { - return *thread->GetBytePointer(); + return (eDataType )*thread->GetBytePointer(); } void WINAPI CLEO_RetrieveOpcodeParams(CLEO::CRunningScript *thread, int count) diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index a8b23545..b560cdc1 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -24,46 +24,13 @@ namespace CLEO friend OpcodeResult __stdcall opcode_0AA3(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC8(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC9(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0AE6(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0AE8(CRunningScript *pScript); public: std::set m_hFiles; std::set m_hNativeLibs; - std::set m_hFileSearches; std::set m_pAllocations; - void FinalizeScriptObjects() - { - // clean up after opcode_0A99 - _chdir(""); - TRACE("Cleaning up script data... %u files, %u libs, %u file scans, %u allocations...", - m_hFiles.size(), m_hNativeLibs.size(), m_hFileSearches.size(), m_pAllocations.size() - ); - - // clean up after opcode_0A9A - for (auto i = m_hFiles.begin(); i != m_hFiles.end(); ++i) - { - if (!is_legacy_handle(*i)) - fclose(convert_handle_to_file(*i)); - } - m_hFiles.clear(); - - // clean up after opcode_0AA2 - std::for_each(m_hNativeLibs.begin(), m_hNativeLibs.end(), FreeLibrary); - m_hNativeLibs.clear(); - - // clean up file searches - std::for_each(m_hFileSearches.begin(), m_hFileSearches.end(), FindClose); - m_hFileSearches.clear(); - - // clean up after opcode_0AB1 - ResetScmFunctionStore(); - - // clean up after opcode_0AC8 - std::for_each(m_pAllocations.begin(), m_pAllocations.end(), free); - m_pAllocations.clear(); - } + void FinalizeScriptObjects(); virtual void Inject(CCodeInjector& inj); ~CCustomOpcodeSystem() @@ -73,4 +40,9 @@ namespace CLEO }; extern void(__thiscall * ProcessScript)(CRunningScript*); + + char* ReadStringParam(CRunningScript* thread, char* buf, BYTE size); + bool WriteStringParam(CRunningScript* thread, const char* str); + int ReadFormattedString(CRunningScript* thread, char* outputStr, size_t len, const char* format); + void SkipUnusedParameters(CRunningScript* thread); // for var-args opcodes } diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 055554fa..64ed0ae6 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -137,12 +137,12 @@ namespace CLEO extern "C" { - DWORD __stdcall CLEO_GetVersion() + DWORD WINAPI CLEO_GetVersion() { return CLEO_VERSION; } - eGameVersion __stdcall CLEO_GetGameVersion() + eGameVersion WINAPI CLEO_GetGameVersion() { return DetermineGameVersion(); } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index c3fba889..997d3a2c 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -155,7 +155,16 @@ namespace CLEO void(__cdecl * DrawScriptStuff_H)(char bBeforeFade); DWORD* GameTimer; - extern "C" { + extern "C" + { + eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread) + { + if (thread->IsCustom()) + return reinterpret_cast(thread)->GetCompatibility(); + else + return CLEO::eCLEO_Version::CLEO_VER_CUR; + } + SCRIPT_VAR *opcodeParams; SCRIPT_VAR *missionLocals; CRunningScript *staticThreads; @@ -189,6 +198,12 @@ namespace CLEO GetInstance().ModuleSystem.Clear(); //GetInstance().ModuleSystem.LoadCleoModules(); // TODO: enbale if cleo_modules approved GetInstance().ScriptEngine.LoadCustomScripts(false); + + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit1)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } } // called on first load before the others @@ -201,6 +216,12 @@ namespace CLEO GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().SoundSystem.UnloadAllStreams(); GetInstance().ScriptEngine.LoadCustomScripts(); + + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit2)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } } // called to load the scripts @@ -213,6 +234,12 @@ namespace CLEO GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().SoundSystem.UnloadAllStreams(); GetInstance().ScriptEngine.LoadCustomScripts(true); + + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit3)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } } extern "C" void __stdcall opcode_004E(CCustomScript *pScript) @@ -388,6 +415,17 @@ namespace CLEO void __fastcall HOOK_ProcessScript(CCustomScript * pScript, int) { + bool process = true; + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptProcess)) + { + typedef bool WINAPI callback(CRunningScript*); + process = process && ((callback*)func)(pScript); + } + if (!process) + { + return; // skip this script + } + if (pScript->IsCustom()) pScript->Process(); else ProcessScript(pScript); } @@ -396,6 +434,12 @@ namespace CLEO { GetInstance().ScriptEngine.DrawScriptStuff(bBeforeFade); + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptDraw)) + { + typedef void WINAPI callback(bool); + ((callback*)func)(bBeforeFade != 0); + } + // restore SCM textures and return to the overwritten func (which may != DrawScriptSprites) return bBeforeFade ? DrawScriptStuff_H(bBeforeFade) : DrawScriptStuff(bBeforeFade); } @@ -882,6 +926,12 @@ namespace CLEO auto cs = LoadScript(filename); if (cs) cs->SetCompatibility(CLEO_VER_3); }); + + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsLoaded)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } } CCustomScript * CScriptEngine::LoadScript(const char * szFilePath) @@ -1137,7 +1187,7 @@ namespace CLEO scriptFileDir = path.parent_path().string(); scriptFileName = path.filename().string(); - workDir = "0:"; // game root + workDir = DIR_GAME; try { diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 36075229..6218fec1 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -18,7 +18,7 @@ namespace CLEO bool bSaveEnabled; bool bOK; DWORD LastSearchPed, LastSearchCar, LastSearchObj; - CLEO_Version CompatVer; + eCLEO_Version CompatVer; BYTE UseTextCommands; int NumDraws; int NumTexts; @@ -45,14 +45,10 @@ namespace CLEO } inline SCRIPT_VAR * GetVarsPtr() { return LocalVar; } - inline WORD GetScmFunction() { return MemRead(reinterpret_cast(this) + 0xDD); } - inline void SetScmFunction(WORD id) { MemWrite(reinterpret_cast(this) + 0xDD, id); } - inline void SetNotFlag(bool b) { NotFlag = b; } - inline char GetNotFlag() { return NotFlag; } inline bool IsOK() const { return bOK; } inline void enable_saving(bool en = true) { bSaveEnabled = en; } - inline void SetCompatibility(CLEO_Version ver) { CompatVer = ver; } - inline CLEO_Version GetCompatibility() { return CompatVer; } + inline void SetCompatibility(eCLEO_Version ver) { CompatVer = ver; } + inline eCLEO_Version GetCompatibility() const { return CompatVer; } inline DWORD& GetLastSearchPed() { return LastSearchPed; } inline DWORD& GetLastSearchVehicle() { return LastSearchCar; } inline DWORD& GetLastSearchObject() { return LastSearchObj; } diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 4ec711e5..50b5d779 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -7,6 +7,16 @@ namespace CLEO CCleoInstance CleoInstance; CCleoInstance& GetInstance() { return CleoInstance; } + inline CCleoInstance::CCleoInstance() + { + m_bStarted = false; + } + + inline CCleoInstance::~CCleoInstance() + { + Stop(); + } + void __declspec(naked) CCleoInstance::OnUpdateGameLogics() { //GetInstance().UpdateGameLogics(); // ! @@ -15,5 +25,42 @@ namespace CLEO dwFunc = (DWORD)(GetInstance().UpdateGameLogics); _asm jmp dwFunc } + + void CCleoInstance::Start() + { + CreateDirectory("cleo", NULL); + CreateDirectory("cleo/cleo_modules", NULL); + CreateDirectory("cleo/cleo_saves", NULL); + CreateDirectory("cleo/cleo_text", NULL); + CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init + GameMenu.Inject(CodeInjector); + DmaFix.Inject(CodeInjector); + UpdateGameLogics = VersionManager.TranslateMemoryAddress(MA_UPDATE_GAME_LOGICS_FUNCTION); + CodeInjector.ReplaceFunction(&OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS)); + TextManager.Inject(CodeInjector); + SoundSystem.Inject(CodeInjector); + OpcodeSystem.Inject(CodeInjector); + ScriptEngine.Inject(CodeInjector); + } + + void CCleoInstance::Stop() + { + if (!m_bStarted) return; + } + + void CCleoInstance::AddCallback(eCallbackId id, void* func) + { + m_callbacks[id].insert(func); + } + + const std::set& CCleoInstance::GetCallbacks(eCallbackId id) + { + return m_callbacks[id]; + } + + void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func) + { + GetInstance().AddCallback(id, func); + } } diff --git a/source/CleoBase.h b/source/CleoBase.h index 6ed95a7a..c28ac0df 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -18,7 +18,8 @@ namespace CLEO { class CCleoInstance { - bool m_bStarted; + bool m_bStarted; + std::map> m_callbacks; public: CDmaFix DmaFix; @@ -33,40 +34,17 @@ namespace CLEO CPluginSystem PluginSystem; //CLegacy Legacy; - CCleoInstance() - { - m_bStarted = false; - } + CCleoInstance(); + virtual ~CCleoInstance(); - virtual ~CCleoInstance() - { - Stop(); - } + void Start(); + void Stop(); + + void AddCallback(eCallbackId id, void* func); + const std::set& GetCallbacks(eCallbackId id); void(__cdecl * UpdateGameLogics)(); static void __cdecl OnUpdateGameLogics(); - - void Start() - { - CreateDirectory("cleo", NULL); - CreateDirectory("cleo/cleo_modules", NULL); - CreateDirectory("cleo/cleo_saves", NULL); - CreateDirectory("cleo/cleo_text", NULL); - CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init - GameMenu.Inject(CodeInjector); - DmaFix.Inject(CodeInjector); - UpdateGameLogics = VersionManager.TranslateMemoryAddress(MA_UPDATE_GAME_LOGICS_FUNCTION); - CodeInjector.ReplaceFunction(&OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS)); - TextManager.Inject(CodeInjector); - SoundSystem.Inject(CodeInjector); - OpcodeSystem.Inject(CodeInjector); - ScriptEngine.Inject(CodeInjector); - } - - void Stop() - { - if (!m_bStarted) return; - } }; CCleoInstance& GetInstance(); diff --git a/source/cleo.def b/source/cleo.def index dc63e3b9..7fd4dfc4 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -28,3 +28,6 @@ EXPORTS _CLEO_AddScriptDeleteDelegate@4 @25 _CLEO_RemoveScriptDeleteDelegate@4 @26 _CLEO_ResolvePath@12 @27 + _CLEO_GetScriptVersion@4 @28 + _CLEO_RegisterCallback@8 @29 + diff --git a/source/stdafx.h b/source/stdafx.h index a0be6691..dd255cda 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -30,19 +30,6 @@ #include "..\cleo_sdk\CLEO.h" #include "CTheScripts.h" -enum CLEO_Version -{ - //CLEO_VER_1 = 0x01000000, - //CLEO_VER_2 = 0x02000000, - CLEO_VER_3 = 0x03000000, - CLEO_VER_4_MIN = 0x04000000, - CLEO_VER_4_2 = 0x04020000, - CLEO_VER_4_3 = 0x04030000, - CLEO_VER_4_4 = 0x04040000, - CLEO_VER_4 = CLEO_VER_4_4, - CLEO_VER_CUR = CLEO_VERSION, -}; - #define CPOOL_USE_HANDLE_ACCESS //#define VALIDATE_SIZE(struc, size) static_assert(sizeof(struc) == size, #struc " (Invalid Structure Size)") From 49f09ebf36514641e7888eafecf7db04039fbac2 Mon Sep 17 00:00:00 2001 From: Seemann Date: Wed, 11 Oct 2023 23:44:22 -0400 Subject: [PATCH 016/216] sort source files using 4 new filters --- .gitignore | 2 +- CLEO4.vcxproj.filters | 156 +++++++++++++++++++++++------------------- cleo_sdk/CLEO.lib | Bin 8768 -> 8768 bytes source/CLEO4.rc | Bin 5454 -> 4092 bytes 4 files changed, 85 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index 5a024441..d49200bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ build/ -output/ +.output/ *.APS *.VC.db *.VC.opendb diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index cbc426e8..5f49e219 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -10,56 +10,26 @@ {5cead5cc-9a75-4d2e-99b5-ebbc8f9d6d86} + + {3104a2cb-d9c5-4eb5-9910-cf77d903db30} + + + {1b4011a1-a8c2-4ab1-a38b-b0e28120d24d} + + + {01a17d56-e9bb-4315-a808-63e91baaafaa} + + + {9c8be703-c930-47b1-b0cb-7c4b80922a48} + - - source - - - source - - - source - - - source - - - source - - - source - - - source - - - source - - - source - - - source - - - source - - - source - source source - - source - - - source - plugin_sdk @@ -81,65 +51,107 @@ cleo_sdk + + source\utils + + + source\utils + + + source\utils + + + source\game_sa + + + source\game_sa + + + source\game_sa + + + source\core + + + source\extensions + + + source\extensions + + + source\extensions + + + source\core + + + source\extensions + + + source\game_sa + + + source\utils + - + source - - source + + cleo_sdk - - source + + source\utils + + + source\utils + + + source\utils - source + source\game_sa - source + source\game_sa - source + source\game_sa + + + source\core + + + source\extensions - source + source\extensions - - source + + source\extensions - source - - - source + source\extensions - source + source\core - source + source\extensions - source + source\game_sa - - source + + source\utils - source - - - source - - - cleo_sdk + source\utils source - - source - diff --git a/cleo_sdk/CLEO.lib b/cleo_sdk/CLEO.lib index 327ffa245123295c3320de2c524457c5f437b323..62d84b62575b0c0d542550b1e1b2458a4f6e30ff 100644 GIT binary patch delta 114 zcmX@$a=>N7J^{}Cf3|&NV`O09m@Fw`Hu=C>* z`GHU~Oh$cjpD+hdcJe7fsmYs!<5*+N7J^@ahU4=i`7#SEiCQFK#P5vuTz@o%(ux4_hU^vskn$4#Kg&0|;F#LTy z`GHU~Oh$cjpD+hdcJe7fsmYs!<5*X5bv(H=`Js^fWD}8Ym@4VX_e5Z-HZzIZ@d5yD Cn<;q! diff --git a/source/CLEO4.rc b/source/CLEO4.rc index d1d30dda274a86071d41dc9781a1233d006f94cb..0a0a245d261b0965227b6cd1deae33653f8abe09 100644 GIT binary patch delta 12 TcmX@7^+$fg8|KY+oB})mCp84g delta 361 zcmew(e@<(|8)oHDhHQo+hD3%;hCCon1d<928VtyysguQ7T3JIGycvQfFJw^`1hW_% z89W*MfY_12Z}LSJQAHO9R|YpAFOGn{;$PkQn?cBzRiVv;co1`JAIDOaEkE^tLgFhz_?6AMM%l^DvwjxGWk zUd*7!kO9=B#1I4YObpNy5Car|o&iCSICevgVTQ^R&^viOkM86v{2G%F@kkLBXzaWP KH+wK|;Q;_LnM08P From b5244a37766a08f9af6f5033ff2e5147312076f1 Mon Sep 17 00:00:00 2001 From: Miran Date: Fri, 13 Oct 2023 03:22:54 +0200 Subject: [PATCH 017/216] added debug_on and debug_off opcodes cleo.cpp removed from CLEO SDK new opcode 0DD5 (get_platform) new opcode 00CC (breakpoint) new opcode 00CD (trace) new opcode 00CE (log_to_file) support of opcodes 0662, 0663 and 0664 more error and warning messages updated opcodes handling --- .gitignore | 5 +- CHANGELOG.md | 23 +- CLEO4.vcxproj | 5 +- CLEO4.vcxproj.filters | 6 +- cleo_plugins/CLEO_Plugins.sln | 6 + cleo_plugins/DebugUtils/DebugUtils.cpp | 365 +++++++++++++++ cleo_plugins/DebugUtils/DebugUtils.ini | 10 + cleo_plugins/DebugUtils/DebugUtils.vcxproj | 129 ++++++ .../DebugUtils/DebugUtils.vcxproj.filters | 34 ++ cleo_plugins/DebugUtils/ScreenLog.cpp | 182 ++++++++ cleo_plugins/DebugUtils/ScreenLog.h | 73 +++ cleo_plugins/DebugUtils/Utils.h | 25 + .../FileSystemOperations.cpp | 6 +- cleo_plugins/IniFiles/IniFiles.cpp | 6 +- cleo_plugins/IntOperations/IntOperations.cpp | 6 +- cleo_sdk/CLEO.cpp | 129 ------ cleo_sdk/CLEO.h | 147 +++--- cleo_sdk/CLEO.lib | Bin 8768 -> 9208 bytes pack_release.bat | 4 + source/CCodeInjector.cpp | 4 +- source/CCustomOpcodeSystem.cpp | 432 ++++++++++-------- source/CCustomOpcodeSystem.h | 37 +- source/CDebug.cpp | 74 ++- source/CDebug.h | 54 +-- source/CDmaFix.cpp | 2 +- source/CGameMenu.cpp | 11 + source/CModuleSystem.cpp | 25 +- source/CPluginSystem.h | 6 +- source/CScriptEngine.cpp | 77 +++- source/CScriptEngine.h | 11 + source/CSoundSystem.cpp | 16 +- source/CTextManager.cpp | 7 +- source/CleoBase.cpp | 9 + source/CleoBase.h | 5 + source/cleo.def | 4 + source/cleo_config.ini | 3 + source/dllmain.cpp | 37 +- source/stdafx.h | 1 - 38 files changed, 1460 insertions(+), 516 deletions(-) create mode 100644 cleo_plugins/DebugUtils/DebugUtils.cpp create mode 100644 cleo_plugins/DebugUtils/DebugUtils.ini create mode 100644 cleo_plugins/DebugUtils/DebugUtils.vcxproj create mode 100644 cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters create mode 100644 cleo_plugins/DebugUtils/ScreenLog.cpp create mode 100644 cleo_plugins/DebugUtils/ScreenLog.h create mode 100644 cleo_plugins/DebugUtils/Utils.h delete mode 100644 cleo_sdk/CLEO.cpp create mode 100644 source/cleo_config.ini diff --git a/.gitignore b/.gitignore index d49200bd..cee44a0e 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,7 @@ build/ Debug/* Release/* ipch/ -.vs/ \ No newline at end of file +.vs/ +*/.output +*.zip +/.output/*/*.lib diff --git a/CHANGELOG.md b/CHANGELOG.md index cf1dc76c..e8b7527f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,29 @@ ## 4.5.0 -- added opcode 0DD5 (get_platform) +- introduced DebugUtils plugin +- new opcode 00C3 (debug_on) +- new opcode 00C4 (debug_off) +- new opcode 00CC (breakpoint) +- new opcode 00CD (trace) +- new opcode 00CE (log_to_file) +- new opcode 0DD5 (get_game_platform) +- implemented support of opcodes 0662, 0663 and 0664 (original R* script debugging opcodes. See DebugUtils.ini) - updated project settings - plugins moved to cleo\cleo_plugins directory - opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts. -- rewriten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts. +- rewritten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts. - added more detailed error messages in some cases +- on some errors instead of crashing game the invalid script is paused - updated general methods for getting and setting string parameters - introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules -- new CLEO SDK export addded: CLEO_ResolvePath -- new CLEO SDK export addded: CLEO_RegisterCallback -- new CLEO SDK export addded: CLEO_GetScriptVersion +- updated opcodes handling. Now all opcodes in range 0-7FFF can be registered by CLEO plugins +- new CLEO SDK export: CLEO_ResolvePath +- new CLEO SDK export: CLEO_RegisterCallback +- new CLEO SDK export: CLEO_GetScriptVersion +- new CLEO SDK export: CLEO_GetScriptDebugMode +- new CLEO SDK export: CLEO_SetScriptDebugMode +- new CLEO SDK export: CLEO_Log +- new CLEO SDK export: CLEO_ReadParamsFormatted ## 4.4.4 diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index 7d39d45a..29111b41 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -11,10 +11,6 @@ - - NotUsing - NotUsing - NotUsing @@ -76,6 +72,7 @@ + diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index 5f49e219..488bddd2 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -48,9 +48,6 @@ plugin_sdk - - cleo_sdk - source\utils @@ -157,6 +154,9 @@ source + + source + diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln index c59a49d9..1794ece0 100644 --- a/cleo_plugins/CLEO_Plugins.sln +++ b/cleo_plugins/CLEO_Plugins.sln @@ -9,6 +9,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IniFiles", "IniFiles\IniFil EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IntOperations", "IntOperations\IntOperations.vcxproj", "{68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DebugUtils", "DebugUtils\DebugUtils.vcxproj", "{481896C4-0C19-4992-9602-729537774B32}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x86 = Debug|x86 @@ -27,6 +29,10 @@ Global {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.Build.0 = Debug|Win32 {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.ActiveCfg = Release|Win32 {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.Build.0 = Release|Win32 + {481896C4-0C19-4992-9602-729537774B32}.Debug|x86.ActiveCfg = Debug|Win32 + {481896C4-0C19-4992-9602-729537774B32}.Debug|x86.Build.0 = Debug|Win32 + {481896C4-0C19-4992-9602-729537774B32}.Release|x86.ActiveCfg = Release|Win32 + {481896C4-0C19-4992-9602-729537774B32}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp new file mode 100644 index 00000000..ec7586e9 --- /dev/null +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -0,0 +1,365 @@ +#include "ScreenLog.h" +#include "Utils.h" +#include "CLEO.h" +#include "CTimer.h" +#include // keyboard +#include +#include +#include +#include + +using namespace CLEO; + +class DebugUtils +{ +public: + static ScreenLog screenLog; + + struct PausedScriptInfo + { + CScriptThread* ptr; + std::string msg; + PausedScriptInfo(CScriptThread* ptr, const char* msg) : ptr(ptr), msg(msg) {} + }; + static std::deque pausedScripts; + + // breakpoint continue keys + static const int KeyFirst = VK_F5; + static const size_t KeyCount = 8; // F5 to F12 + static bool keysReleased; // none of continue keys was pressed during previous frame + + static std::map logFiles; + + DebugUtils() + { + auto cleoVer = CLEO_GetVersion(); + if (cleoVer >= CLEO_VERSION) + { + auto config = GetConfigFilename(); + + // register opcodes + CLEO_RegisterOpcode(0x00C3, Opcode_DebugOn); + CLEO_RegisterOpcode(0x00C4, Opcode_DebugOff); + CLEO_RegisterOpcode(0x00CC, Opcode_Breakpoint); + CLEO_RegisterOpcode(0x00CD, Opcode_Trace); + CLEO_RegisterOpcode(0x00CE, Opcode_LogToFile); + + // original Rockstar's script debugging opcodes + if(GetPrivateProfileInt("General", "LegacyDebugOpcodes", 0, config.c_str()) != 0) + { + CLEO_RegisterOpcode(0x0662, Opcode_PrintString); + CLEO_RegisterOpcode(0x0663, Opcode_PrintInt); + CLEO_RegisterOpcode(0x0664, Opcode_PrintFloat); + } + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); + CLEO_RegisterCallback(eCallbackId::ScriptDraw, OnScriptDraw); + CLEO_RegisterCallback(eCallbackId::MenuDraw, OnMenuDraw); + CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); + CLEO_RegisterCallback(eCallbackId::Log, OnLog); + } + else + { + std::string err(128, '\0'); + sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.data(), "DebugUtils.cleo", MB_SYSTEMMODAL | MB_ICONERROR); + } + } + + // ---------------------------------------------- event callbacks ------------------------------------------------- + + static void WINAPI OnScriptsFinalize() + { + pausedScripts.clear(); + logFiles.clear(); // close all + } + + static void WINAPI OnScriptDraw(bool beforeFade) + { + if (beforeFade) return; // skip drawing before fade pass + + OnMenuDraw(); + } + + static void WINAPI OnMenuDraw() + { + // log messages + screenLog.Draw(); + + // draw active breakpoints list + for (size_t i = 0; i < pausedScripts.size(); i++) + { + std::ostringstream ss; + ss << "Script '" << pausedScripts[i].ptr->GetName() << "' breakpoint"; + + if(!pausedScripts[i].msg.empty()) // named breakpoint + { + ss << " '" << pausedScripts[i].msg << "'"; + } + + if(i < KeyCount) + { + ss << " (F" << 5 + i << ")"; + } + + screenLog.DrawLine(ss.str().c_str(), i); + } + + // update keys state + if(!keysReleased) + { + keysReleased = true; + for (size_t i = 0; i < KeyCount; i++) + { + auto state = GetKeyState(KeyFirst + i); + if (state & 0x8000) // pressed + { + keysReleased = false; + break; + } + } + } + else // ready for next press + { + for (size_t i = 0; i < pausedScripts.size(); i++) + { + if (keysReleased && i < KeyCount) + { + auto state = GetKeyState(KeyFirst + i); + if (state & 0x8000) // pressed + { + keysReleased = false; + + std::stringstream ss; + ss << "Script breakpoint "; + if (!pausedScripts[i].msg.empty()) ss << "'" << pausedScripts[i].msg << "' "; + ss << "released in '" << pausedScripts[i].ptr->GetName() << "'"; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + + if (CTimer::m_CodePause) + { + CLEO_Log(eLogLevel::Debug, "Game unpaused"); + CTimer::m_CodePause = false; + } + + pausedScripts.erase(pausedScripts.begin() + i); + + break; // breakpoint continue + } + } + } + } + } + + static bool WINAPI OnScriptProcess(CScriptThread* thread) + { + for (size_t i = 0; i < pausedScripts.size(); i++) + { + if (pausedScripts[i].ptr == thread) + { + return false; // script paused, do not process + } + } + + return true; + } + + static void WINAPI OnLog(eLogLevel level, const char* msg) + { + screenLog.Add(level, msg); + } + + // ---------------------------------------------- opcodes ------------------------------------------------- + + // 00C3=0, debug_on + static OpcodeResult WINAPI Opcode_DebugOn(CScriptThread* thread) + { + CLEO_SetScriptDebugMode(thread, true); + + return OR_CONTINUE; + } + + // 00C4=0, debug_off + static OpcodeResult WINAPI Opcode_DebugOff(CScriptThread* thread) + { + CLEO_SetScriptDebugMode(thread, false); + + return OR_CONTINUE; + } + + // 00CC=-1, breakpoint ... + static OpcodeResult WINAPI Opcode_Breakpoint(CScriptThread* thread) + { + if (!CLEO_GetScriptDebugMode(thread)) + { + SkipUnusedParams(thread); + return OR_CONTINUE; + } + + bool blocking = true; // pause entire game logic + std::string name = ""; + + // bool param - blocking + auto paramType = CLEO_GetOperandType(thread); + if(paramType == DT_BYTE) + { + blocking = CLEO_GetIntOpcodeParam(thread) != 0; + } + + paramType = CLEO_GetOperandType(thread); + if (paramType == eDataType::DT_END) + { + thread->IncPtr(); // consume arguments terminator + } + else // breakpoint formatted name string + { + auto format = CLEO_ReadStringOpcodeParam(thread); + name = CLEO_ReadParamsFormatted(thread, format); + } + + pausedScripts.emplace_back(thread, name.c_str()); + + std::stringstream ss; + ss << "Script breakpoint"; + if (!name.empty()) ss << " '" << name << "'"; + ss << " captured in '" << thread->GetName() << "'"; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + + if(blocking) + { + CLEO_Log(eLogLevel::Debug, "Game paused"); + CTimer::m_CodePause = true; + } + + return OR_CONTINUE; + } + + // 00CD=-1, trace %1s% ... + static OpcodeResult WINAPI Opcode_Trace(CScriptThread* thread) + { + if (!CLEO_GetScriptDebugMode(thread)) + { + SkipUnusedParams(thread); + return OR_CONTINUE; + } + + auto format = CLEO_ReadStringOpcodeParam(thread); + auto message = CLEO_ReadParamsFormatted(thread, format); + + CLEO_Log(eLogLevel::Debug, message); + return OR_CONTINUE; + } + + // 00CE=-1, log_to_file %1s% timestamp %2d% text %3s% ... + static OpcodeResult WINAPI Opcode_LogToFile(CScriptThread* thread) + { + auto filestr = CLEO_ReadStringOpcodeParam(thread); + + std::string filename(MAX_PATH, '\0'); + const size_t len = strlen(filestr); + for(size_t i = 0; i < len; i++) + { + if(filestr[i] == '/') + filename[i] = '\\'; + else + filename[i] = std::tolower(filestr[i]); + } + CLEO_ResolvePath(thread, filename.data(), MAX_PATH); + filename.resize(strlen(filename.data())); // clip to actual cstr len + + auto it = logFiles.find(filename); + if(it == logFiles.end()) // not opened yet + { + it = logFiles.emplace(std::piecewise_construct, std::make_tuple(filename), std::make_tuple(filename, std::ios_base::app)).first; + } + + auto& file = it->second; + if(!file.good()) + { + std::ostringstream ss; + ss << "Failed to open log file '" << filename << "'"; + CLEO_Log(eLogLevel::Error, ss.str().c_str()); + + SkipUnusedParams(thread); + return OR_CONTINUE; + } + + // time stamp + if(CLEO_GetIntOpcodeParam(thread) != 0) + { + SYSTEMTIME t; + GetLocalTime(&t); + static char szBuf[64]; + sprintf(szBuf, "%02d/%02d/%04d %02d:%02d:%02d.%03d ", t.wDay, t.wMonth, t.wYear, t.wHour, t.wMinute, t.wSecond, t.wMilliseconds); + file << szBuf; + } + + auto format = CLEO_ReadStringOpcodeParam(thread); + auto message = CLEO_ReadParamsFormatted(thread, format); + + file << message << std::endl; + + return OR_CONTINUE; + } + + // 0662=1, printstring %1s% + static OpcodeResult WINAPI Opcode_PrintString(CScriptThread* thread) + { + if (!CLEO_GetScriptDebugMode(thread)) + { + CLEO_SkipOpcodeParams(thread, 1); + return OR_CONTINUE; + } + + auto text = CLEO_ReadStringOpcodeParam(thread); + + CLEO_Log(eLogLevel::Debug, text); + + return OR_CONTINUE; + } + + // 0663=1, printint %1s% %2d% + static OpcodeResult WINAPI Opcode_PrintInt(CScriptThread* thread) + { + if (!CLEO_GetScriptDebugMode(thread)) + { + CLEO_SkipOpcodeParams(thread, 2); + return OR_CONTINUE; + } + + auto text = CLEO_ReadStringOpcodeParam(thread); + auto value = CLEO_GetIntOpcodeParam(thread); + + std::ostringstream ss; + ss << text << ": " << value; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + + return OR_CONTINUE; + } + + // 0664=1, printfloat %1s% %2f% + static OpcodeResult WINAPI Opcode_PrintFloat(CScriptThread* thread) + { + if (!CLEO_GetScriptDebugMode(thread)) + { + CLEO_SkipOpcodeParams(thread, 2); + return OR_CONTINUE; + } + + auto text = CLEO_ReadStringOpcodeParam(thread); + auto value = CLEO_GetFloatOpcodeParam(thread); + + std::ostringstream ss; + ss << text << ": " << value; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + + return OR_CONTINUE; + } +} DebugUtils; + +ScreenLog DebugUtils::screenLog = {}; +std::deque DebugUtils::pausedScripts; +bool DebugUtils::keysReleased = true; +std::map DebugUtils::logFiles; + diff --git a/cleo_plugins/DebugUtils/DebugUtils.ini b/cleo_plugins/DebugUtils/DebugUtils.ini new file mode 100644 index 00000000..320557f0 --- /dev/null +++ b/cleo_plugins/DebugUtils/DebugUtils.ini @@ -0,0 +1,10 @@ +[General] +; Opcodes 0662, 0663, 0664: 0 - off, 1 - enabled +LegacyDebugOpcodes=0 + +[ScreenLog] +; Level: 0 - off, 1 - errors and warnings, 2 - debug messages, 3 - all +Level=2 +MessageTime=8000 +MessagesMax=35 +FontSize=55 diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj new file mode 100644 index 00000000..bcaf5cd0 --- /dev/null +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -0,0 +1,129 @@ + + + + + Release + Win32 + + + Debug + Win32 + + + + {481896C4-0C19-4992-9602-729537774B32} + true + Win32Proj + DebugUtils + 10.0 + + + + DynamicLibrary + false + MultiByte + v143 + true + + + DynamicLibrary + true + MultiByte + v143 + + + + + + + + + + + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + DebugUtils + .cleo + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + DebugUtils + .cleo + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 + + + true + true + true + UseLinkTimeCodeGeneration + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + + Level3 + Disabled + true + MultiThreadedDebug + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 + + + true + Default + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters new file mode 100644 index 00000000..46219848 --- /dev/null +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters @@ -0,0 +1,34 @@ + + + + + + + sdk + + + sdk + + + sdk + + + sdk + + + sdk + + + + + + + + + {7293454f-2941-4f6c-b0d3-b52d27a88286} + + + + + + \ No newline at end of file diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp new file mode 100644 index 00000000..70068763 --- /dev/null +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -0,0 +1,182 @@ +#include "ScreenLog.h" +#include "Utils.h" +#include "CFont.h" +#include "CTimer.h" + +ScreenLog::ScreenLog() +{ + scrollOffset = 0.0f; + + Init(); +} + +void ScreenLog::Init() +{ + // load settings from ini file + auto config = GetConfigFilename(); + + level = (eLogLevel)GetPrivateProfileInt("ScreenLog", "Level", (UINT)eLogLevel::None, config.c_str()); + maxMessages = GetPrivateProfileInt("ScreenLog", "MessagesMax", 40, config.c_str()); + timeDisplay = GetPrivateProfileInt("ScreenLog", "MessageTime", 6000, config.c_str()); + timeFadeout = 2000; + fontSize = 0.01f * GetPrivateProfileInt("ScreenLog", "FontSize", 60, config.c_str()); +} + +void ScreenLog::Add(eLogLevel level, const char* msg) +{ + if (level > this->level) + { + return; + } + + // calculate end time + auto end = DWORD(0.3f * timeDisplay); + end += DWORD(0.7f * timeDisplay * strlen(msg) / 40); // assume 40 characters as baseline + end += GetTime(); + + entries.emplace_front(level, msg, end); + + if (entries.size() > maxMessages) + { + entries.resize(maxMessages); + } + + // update scroll pos + float sizeY = fontSize * static_cast(RsGlobal.maximumHeight) / 448.0f; + size_t lines = CountLines(std::string(msg)); + scrollOffset += 18.0f * lines * sizeY; +} + +void ScreenLog::Draw() +{ + // scroll animation + static DWORD prevTime; + DWORD currTime = GetTickCount(); // game independent + if (scrollOffset > 0.001f) + { + float delta = 0.01f * (currTime - prevTime); + scrollOffset *= max(0.9f - delta, 0.0f); + } + else + scrollOffset = 0.0f; + prevTime = currTime; + + const auto now = GetTime(); // miliseconds + + // clean up expired entries + while(!entries.empty()) + { + if(entries.back().endTime + timeFadeout < now) + entries.pop_back(); + else + break; + } + + if (entries.empty()) + { + scrollOffset = 0.0f; + return; // nothing to print + } + + CFont::SetBackground(false, false); + CFont::SetWrapx(99999999.0f); // no line wrap + CFont::SetFontStyle(FONT_SUBTITLES); + CFont::SetEdge(1); + CFont::SetProportional(true); + + const float aspect = (float)RsGlobal.maximumWidth / RsGlobal.maximumHeight; + float sizeX = fontSize * 0.55f * RsGlobal.maximumWidth / 640.0f / aspect; + float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; + CFont::SetScale(sizeX, sizeY); + + CFont::SetOrientation(ALIGN_LEFT); + float posX = 15.0f * sizeX; + float posY = 7.0f * sizeY - scrollOffset; + + for (size_t i = 0; i < entries.size(); i++) + { + auto& entry = entries[i]; + + // carry on from any following text that is longer + auto endTime = entry.endTime; + for (size_t j = i + 1; j < entries.size(); j++) + { + endTime = max(endTime, entries[j].endTime); + } + + BYTE alpha = 255; + if (endTime < now) + { + auto elapsed = now - endTime; + float fadeProgress = (float)elapsed / timeFadeout; + fadeProgress = std::clamp(fadeProgress, 0.0f, 1.0f); + fadeProgress = 1.0f - fadeProgress; // fade out + fadeProgress = sqrtf(fadeProgress); + alpha = (BYTE)(fadeProgress * 0xFF); + } + + auto color = fontColor[(size_t)entry.level]; + alpha = min(alpha, color.a); + color.a = alpha; + + CFont::SetColor(color); + + alpha = std::clamp(int(alpha * alpha) / 255, 0, 255); // corrected for fadeout + CFont::SetDropColor(CRGBA(0, 0, 0, alpha)); + + CFont::PrintString(posX, posY, entry.msg.c_str()); + + size_t lines = CountLines(entry.msg); + posY += 18.0f * sizeY * lines; + } +} + +void ScreenLog::DrawLine(const char* msg, size_t row) +{ + CFont::SetBackground(false, false); + CFont::SetWrapx(99999999.0f); // no line wrap + CFont::SetFontStyle(FONT_SUBTITLES); + CFont::SetEdge(1); + CFont::SetProportional(true); + + const float aspect = (float)RsGlobal.maximumWidth / RsGlobal.maximumHeight; + float sizeX = fontSize * 0.55f * RsGlobal.maximumWidth / 640.0f / aspect; + float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; + CFont::SetScale(sizeX, sizeY); + + CFont::SetOrientation(ALIGN_RIGHT); + float posX = (float)RsGlobal.maximumWidth - 15.0f * sizeX; + + //if(FrontEndMenuManager.m_bHudOn) + float posY = 0.25f * RsGlobal.maximumHeight; + posY += 18.0f * sizeY * row; + + auto color = fontColor[(size_t)eLogLevel::Error]; + CFont::SetColor(color); + CFont::SetDropColor(CRGBA(0, 0, 0, color.a)); + + CFont::PrintString(posX, posY, msg); +} + +size_t ScreenLog::CountLines(std::string& msg) +{ + size_t lines = 1; + + size_t pos = 0; + while ((pos = msg.find("~n~", pos)) != std::string::npos) + { + lines++; + pos += 3; // pattern length + } + + lines += std::count(msg.begin(), msg.end(), '\n'); + + return lines; +} + +DWORD ScreenLog::GetTime() +{ + //return GetTickCount(); + return CTimer::m_snPreviousTimeInMillisecondsNonClipped; +} + diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h new file mode 100644 index 00000000..31249293 --- /dev/null +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -0,0 +1,73 @@ +#pragma once +#include "CLEO.h" +#include "CRGBA.h" +#include +#include + +using namespace CLEO; + +class ScreenLog +{ +public: + ScreenLog(); + + void Init(); + void Add(eLogLevel level, const char* msg); + void Draw(); + void DrawLine(const char* msg, size_t row = 0); + +private: + eLogLevel level; + size_t maxMessages; + float fontSize; + DWORD timeDisplay; + DWORD timeFadeout; + + const CRGBA fontColor[4] = { // colors for eLogLevel + CRGBA(0xDD, 0xDD, 0xDD, 0xF0), // None + CRGBA(0xFF, 0x30, 0x30, 0xF0), // Error + CRGBA(0xFF, 0xEE, 0x30, 0xF0), // User + CRGBA(0xDD, 0xDD, 0xDD, 0xF0), // Default + }; + + struct Entry + { + eLogLevel level; + std::string msg; + DWORD endTime; + + Entry() : + level(eLogLevel::Default), + msg(""), + endTime(0) + { + } + + Entry(eLogLevel level, const char* msg, DWORD endTime) : + level(level), + endTime(endTime) + { + if(msg != nullptr) + { + auto len = strlen(msg); + this->msg.reserve(len); + + for(size_t i = 0; i < len; i++) + { + char c = msg[i]; + + if(c == '\n') + this->msg += "~n~"; + else + this->msg.push_back(c); + } + } + } + }; + + static size_t CountLines(std::string& msg); + static DWORD GetTime(); + + std::deque entries; + float scrollOffset; +}; \ No newline at end of file diff --git a/cleo_plugins/DebugUtils/Utils.h b/cleo_plugins/DebugUtils/Utils.h new file mode 100644 index 00000000..a094906d --- /dev/null +++ b/cleo_plugins/DebugUtils/Utils.h @@ -0,0 +1,25 @@ +#pragma once +#include "CLEO.h" +#include "CFileMgr.h" +#include + +// plugin's config file +static std::string GetConfigFilename() +{ + std::string configFile = CFileMgr::ms_rootDirName; + if (!configFile.empty()) configFile += "\\"; + + configFile += "cleo\\cleo_plugins\\DebugUtils.ini"; + + return configFile; +} + +// var-args opcodes +static void SkipUnusedParams(CScriptThread* thread) +{ + while (CLEO_GetOperandType(thread) != DT_END) + CLEO_SkipOpcodeParams(thread, 1); // skip param + + thread->ReadDataByte(); // skip terminator +} + diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index ed4a9a35..45f9395b 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -43,9 +43,9 @@ class FileSystemOperations } else { - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); - MessageBox(HWND_DESKTOP, err.data(), "FileSystemOperations.cleo", MB_ICONERROR); + std::string err(128, '\0'); + sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.data(), "FileSystemOperations.cleo", MB_SYSTEMMODAL | MB_ICONERROR); } } diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index d6f1e357..8e9e1b9d 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -22,9 +22,9 @@ class IniFiles } else { - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); - MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_ICONERROR); + std::string err(128, '\0'); + sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_SYSTEMMODAL | MB_ICONERROR); } } diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index d3b4363a..b14f436f 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -30,9 +30,9 @@ class IntOperations } else { - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "An incorrect version of CLEO (%X) was loaded. \nThis plugin requires version %X or later.", cleoVer, CLEO_VERSION); - MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_ICONERROR); + std::string err(128, '\0'); + sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.data(), "IntOperations.cleo", MB_SYSTEMMODAL | MB_ICONERROR); } } diff --git a/cleo_sdk/CLEO.cpp b/cleo_sdk/CLEO.cpp deleted file mode 100644 index 8338f05e..00000000 --- a/cleo_sdk/CLEO.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include "CLEO.h" - -namespace CLEO -{ - -#ifdef __cplusplus -CRunningScript::CRunningScript() -{ - strcpy(Name, "noname"); - BaseIP = 0; - Previous = 0; - Next = 0; - CurrentIP = 0; - memset(Stack, 0, sizeof(Stack)); - SP = 0; - WakeTime = 0; - bIsActive = 0; - bCondResult = 0; - bUseMissionCleanup = 0; - bIsExternal = 0; - bTextBlockOverride = 0; - bExternalType = -1; - memset(LocalVar, 0, sizeof(LocalVar)); - LogicalOp = eLogicalOperation::NONE; - NotFlag = 0; - bWastedBustedCheck = 1; - bWastedOrBusted = 0; - SceneSkipIP = 0; - bIsMission = 0; - ScmFunction = 0; - bIsCustom = 0; -} - -bool CRunningScript::IsActive() const { return bIsActive; } - -bool CRunningScript::IsExternal() const { return bIsExternal; } - -bool CRunningScript::IsMission() const { return bIsMission; } - -bool CRunningScript::IsCustom() const { return bIsCustom; } - -const char* CRunningScript::GetName() const { return Name; } - -BYTE* CRunningScript::GetBasePointer() const { return (BYTE*)BaseIP; } - -BYTE* CRunningScript::GetBytePointer() const { return CurrentIP; } - -void CRunningScript::SetIp(void* ip) { CurrentIP = (BYTE*)ip; } - -void CRunningScript::SetBaseIp(void* ip) { BaseIP = ip; } - -CRunningScript* CRunningScript::GetNext() const { return Next; } - -CRunningScript* CRunningScript::GetPrev() const { return Previous; } - -void CRunningScript::SetIsExternal(bool b) { bIsExternal = b; } - -void CRunningScript::SetActive(bool b) { bIsActive = b; } - -void CRunningScript::SetNext(CRunningScript* v) { Next = v; } - -void CRunningScript::SetPrev(CRunningScript* v) { Previous = v; } - -SCRIPT_VAR* CRunningScript::GetVarPtr() { return LocalVar; } - -SCRIPT_VAR* CRunningScript::GetVarPtr(int i) { return &LocalVar[i]; } - -int* CRunningScript::GetIntVarPtr(int i) { return (int*)&LocalVar[i].dwParam; } - -int CRunningScript::GetIntVar(int i) const { return LocalVar[i].dwParam; } - -void CRunningScript::SetIntVar(int i, int v) { LocalVar[i].dwParam = v; } - -void CRunningScript::SetFloatVar(int i, float v) { LocalVar[i].fParam = v; } - -char CRunningScript::GetByteVar(int i) const { return LocalVar[i].bParam; } - -bool CRunningScript::GetConditionResult() const { return bCondResult != 0; } - -bool CRunningScript::CRunningScript::GetNotFlag() const { return NotFlag; } - -void CRunningScript::CRunningScript::SetNotFlag(bool state) { NotFlag = state; } - -char CRunningScript::ReadDataType() { return ReadDataByte(); } - -short CRunningScript::ReadDataVarIndex() { return ReadDataWord(); } - -short CRunningScript::ReadDataArrayOffset() { return ReadDataWord(); } - -short CRunningScript::ReadDataArrayIndex() { return ReadDataWord(); } - -short CRunningScript::ReadDataArraySize() { return ReadDataByte(); } - -short CRunningScript::ReadDataArrayFlags() { return ReadDataByte(); } - -void CRunningScript::IncPtr(int n) { CurrentIP += n; } - -int CRunningScript::ReadDataByte() -{ - char b = *CurrentIP; - ++CurrentIP; - return b; -} - -short CRunningScript::ReadDataWord() -{ - short v = *(short*)CurrentIP; - CurrentIP += 2; - return v; -} - -int CRunningScript::ReadDataInt() -{ - int i = *(int*)CurrentIP; - CurrentIP += 4; - return i; -} - -void CRunningScript::PushStack(BYTE* ptr) { Stack[SP++] = ptr; } - -BYTE* CRunningScript::PopStack() { return Stack[--SP]; } - -WORD CRunningScript::GetScmFunction() const { return ScmFunction; } - -void CRunningScript::SetScmFunction(WORD id) { ScmFunction = id; } - -#endif // __cplusplus - -} // CLEO namespace diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 52ba7a1d..5dec09d6 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -121,15 +121,26 @@ const char DIR_CLEO[] = "3:"; // game\cleo directory const char DIR_MODULES[] = "4:"; // game\cleo\modules directory // argument of CLEO_RegisterCallback -enum class eCallbackId +enum class eCallbackId : DWORD { ScmInit1, // void WINAPI OnScmInit1(); ScmInit2, // void WINAPI OnScmInit2(); ScmInit3, // void WINAPI OnScmInit3(); ScriptsLoaded, // void WINAPI OnScriptsLoaded(); ScriptsFinalize, // void WINAPI OnScriptsFinalize(); - ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript, int); // return false to skip this script processing + ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript); // return false to skip this script processing ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); + MenuDraw, // void WINAPI OnMenuDraw(); + Log, // void OnLog(eLogLevel level, const char* msg); +}; + +// used by CLEO_Log and Log callback +enum class eLogLevel : DWORD +{ + None, + Error, // errors and warnings + Debug, // debug mode related + Default // all }; typedef int SCRIPT_HANDLE; @@ -177,53 +188,78 @@ struct CRunningScript #ifdef __cplusplus public: - CRunningScript(); - - bool IsActive() const; - bool IsExternal() const; - bool IsMission() const; - bool IsCustom() const; // is this CLEO Script? - const char* GetName() const; - BYTE* GetBasePointer() const; - BYTE* GetBytePointer() const; - void SetIp(void* ip); - void SetBaseIp(void* ip); - CRunningScript* GetNext() const; - CRunningScript* GetPrev() const; - void SetIsExternal(bool b); - void SetActive(bool b); - void SetNext(CRunningScript* v); - void SetPrev(CRunningScript* v); - SCRIPT_VAR* GetVarPtr(); - SCRIPT_VAR* GetVarPtr(int i); - int* GetIntVarPtr(int i); - int GetIntVar(int i) const; - void SetIntVar(int i, int v); - void SetFloatVar(int i, float v); - char GetByteVar(int i) const; - bool GetConditionResult() const; - bool GetNotFlag() const; - void SetNotFlag(bool state); - - char ReadDataType(); - short ReadDataVarIndex(); - short ReadDataArrayOffset(); - short ReadDataArrayIndex(); - short ReadDataArraySize(); - short ReadDataArrayFlags(); + CRunningScript() + { + strcpy(Name, "noname"); + BaseIP = 0; + Previous = 0; + Next = 0; + CurrentIP = 0; + memset(Stack, 0, sizeof(Stack)); + SP = 0; + WakeTime = 0; + bIsActive = 0; + bCondResult = 0; + bUseMissionCleanup = 0; + bIsExternal = 0; + bTextBlockOverride = 0; + bExternalType = -1; + memset(LocalVar, 0, sizeof(LocalVar)); + LogicalOp = eLogicalOperation::NONE; + NotFlag = 0; + bWastedBustedCheck = 1; + bWastedOrBusted = 0; + SceneSkipIP = 0; + bIsMission = 0; + ScmFunction = 0; + bIsCustom = 0; + } + + bool IsActive() const { return bIsActive; } + bool IsExternal() const { return bIsExternal; } + bool IsMission() const { return bIsMission; } + bool IsCustom() const { return bIsCustom; } // is this CLEO Script? + const char* GetName() const { return Name; } + BYTE* GetBasePointer() const { return (BYTE*)BaseIP; } + BYTE* GetBytePointer() const { return CurrentIP; } + void SetIp(void* ip) { CurrentIP = (BYTE*)ip; } + void SetBaseIp(void* ip) { BaseIP = ip; } + CRunningScript* GetNext() const { return Next; } + CRunningScript* GetPrev() const { return Previous; } + void SetIsExternal(bool b) { bIsExternal = b; } + void SetActive(bool b) { bIsActive = b; } + void SetNext(CRunningScript* v) { Next = v; } + void SetPrev(CRunningScript* v) { Previous = v; } + SCRIPT_VAR* GetVarPtr() { return LocalVar; } + SCRIPT_VAR* GetVarPtr(int i) { return &LocalVar[i]; } + int* GetIntVarPtr(int i) { return (int*)&LocalVar[i].dwParam; } + int GetIntVar(int i) const { return LocalVar[i].dwParam; } + void SetIntVar(int i, int v) { LocalVar[i].dwParam = v; } + void SetFloatVar(int i, float v) { LocalVar[i].fParam = v; } + char GetByteVar(int i) const { return LocalVar[i].bParam; } + bool GetConditionResult() const { return bCondResult != 0; } + bool GetNotFlag() const { return NotFlag; } + void SetNotFlag(bool state) { NotFlag = state; } + + char ReadDataType() { return ReadDataByte(); } + short ReadDataVarIndex() { return ReadDataWord(); } + short ReadDataArrayOffset() { return ReadDataWord(); } + short ReadDataArrayIndex() { return ReadDataWord(); } + short ReadDataArraySize() { return ReadDataByte(); } + short ReadDataArrayFlags() { return ReadDataByte(); } - void IncPtr(int n = 1); - int ReadDataByte(); - short ReadDataWord(); - int ReadDataInt(); + void IncPtr(int n = 1) { CurrentIP += n; } + int ReadDataByte() { char b = *CurrentIP; ++CurrentIP; return b; } + short ReadDataWord() { short v = *(short*)CurrentIP; CurrentIP += 2; return v; } + int ReadDataInt() { int i = *(int*)CurrentIP; CurrentIP += 4; return i; } - void PushStack(BYTE* ptr); - BYTE* PopStack(); + void PushStack(BYTE* ptr) { Stack[SP++] = ptr; } + BYTE* PopStack() { return Stack[--SP]; } - WORD GetScmFunction() const; - void SetScmFunction(WORD id); + WORD GetScmFunction() const { return ScmFunction; } + void SetScmFunction(WORD id) { ScmFunction = id; } - #endif // __cplusplus +#endif // __cplusplus }; #pragma pack(pop) static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!"); @@ -238,7 +274,8 @@ static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!") enum OpcodeResult : char { OR_CONTINUE = 0, - OR_INTERRUPT = 1 + OR_INTERRUPT = 1, + OR_ERROR = -1, }; typedef OpcodeResult (CALLBACK* _pOpcodeHandler)(CRunningScript*); @@ -261,9 +298,11 @@ float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, LPSTR buf, int size); -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, LPSTR buf, int size); // exactly same as CLEO_ReadStringOpcodeParam -void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, LPCSTR str); +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); +LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); // exactly same as CLEO_ReadStringOpcodeParam +void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, const char* str); + +char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int size = 0); void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); @@ -295,8 +334,14 @@ void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func); void WINAPI CLEO_RemoveScriptDeleteDelegate(FuncScriptDeleteDelegateT func); -// convert to absolute file path -void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); +void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path + +BOOL WINAPI CLEO_GetScriptDebugMode(const CRunningScript* thread); // debug mode features enabled for this script? +void WINAPI CLEO_SetScriptDebugMode(CRunningScript* thread, BOOL enabled); + +void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log + +void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages #ifdef __cplusplus } diff --git a/cleo_sdk/CLEO.lib b/cleo_sdk/CLEO.lib index 62d84b62575b0c0d542550b1e1b2458a4f6e30ff..25791ef2cfc7d77359a137a1b2bf7606095e5f5c 100644 GIT binary patch delta 879 zcmaLW%WD%+6bA4w)5bgs+Poi=cYQ=~(Uck#RGRuKHK5H#D3}Mdh(c_emQb(~#En$Y zz*Xont1hGqb#_5Yy3;HQW?e+lMd@FVO^M&+Hicp}@XMLx+;isMGfd+~`*!tmC2?^i z(*5ME08)T$8gMKF&JDo5D|xa&*ES>hl9I|AT_7U`Cjnh(SPHKKx(Hi$l^Jz=QZ!!y zqIqgbu?e87!AL{#164-ytxJI>Z%F=_w8}+01NV=3b#xC@$WbD-{b%OS3a$ ziR6*x$1f3A%_EgW@^JV(2&QeovIE#~0=sS?>jAbY=>x1Pkm2qpgTQbISPiov0+eZw z@=>5g6KvF=_z+N~b=Edn7xM$-UZBLjb;h=fldQ2(fQ{Ce++^c2Z?Z!v(J`xiQN+ZQ zB`A`XfPSq_St^Fu!8rbNc=1MwiopSsc&vuBQv+&W{O=L|V9#K=yh|k8;+Ov}> z#d4v(&^}=R%);kPiv7X){us7a#DeX#YsdQ#KB3_K@Isi-nu`*!n z0Pz-(i262|o;M{m3wY8)zB-efEk%?7Pv%cQcK#AvEXMkc4a7u+N(4aJ%R4LpGtkMl#vesbD zv<<8zftm%}GX-<-iH(-osL4g0WBwEfq!EM$9X1?)LD!o_!s^%iZpS-~h(f<(}opc{3{4jC|YJy1Q=}0qwEbeS0il&8)nO kFa6(Ua*JFuOToGCsZaT0{`KhhYA>t@O_ushwCharacteristics & IMAGE_SCN_MEM_EXECUTE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; if (!VirtualProtect(pImageBase + pSection->VirtualAddress, dwPhysSize, newProtect, &oldProtect)) - Error("Virtual protect error"); + SHOW_ERROR("Virtual protect error"); } } @@ -60,7 +60,7 @@ namespace CLEO ); DWORD oldProtect, newProtect = (pSection->Characteristics & IMAGE_SCN_MEM_EXECUTE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; if (!VirtualProtect(pImageBase + pSection->VirtualAddress, dwPhysSize, newProtect, &oldProtect)) - Error("Virtual protect error"); + SHOW_ERROR("Virtual protect error"); } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 0d9de559..b65e0e4f 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -120,32 +120,6 @@ namespace CLEO { OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform - CustomOpcodeHandler customOpcodeHandlers[100] = - { - opcode_0A8C, opcode_0A8D, opcode_0A8E, opcode_0A8F, opcode_0A90, - opcode_0A91, opcode_0A92, opcode_0A93, opcode_0A94, opcode_0A95, - opcode_0A96, opcode_0A97, opcode_0A98, opcode_0A99, opcode_0A9A, - opcode_0A9B, opcode_0A9C, opcode_0A9D, opcode_0A9E, opcode_0A9F, - opcode_0AA0, opcode_0AA1, opcode_0AA2, opcode_0AA3, opcode_0AA4, - opcode_0AA5, opcode_0AA6, opcode_0AA7, opcode_0AA8, opcode_0AA9, - opcode_0AAA, nullptr, opcode_0AAC, opcode_0AAD, opcode_0AAE, - opcode_0AAF, opcode_0AB0, opcode_0AB1, opcode_0AB2, opcode_0AB3, - opcode_0AB4, opcode_0AB5, opcode_0AB6, opcode_0AB7, opcode_0AB8, - opcode_0AB9, opcode_0ABA, opcode_0ABB, opcode_0ABC, opcode_0ABD, - opcode_0ABE, opcode_0ABF, opcode_0AC0, opcode_0AC1, opcode_0AC2, - opcode_0AC3, opcode_0AC4, opcode_0AC5, opcode_0AC6, opcode_0AC7, - opcode_0AC8, opcode_0AC9, opcode_0ACA, opcode_0ACB, opcode_0ACC, - opcode_0ACD, opcode_0ACE, opcode_0ACF, opcode_0AD0, opcode_0AD1, - opcode_0AD2, opcode_0AD3, opcode_0AD4, opcode_0AD5, opcode_0AD6, - opcode_0AD7, opcode_0AD8, opcode_0AD9, opcode_0ADA, opcode_0ADB, - opcode_0ADC, opcode_0ADD, opcode_0ADE, opcode_0ADF, opcode_0AE0, - opcode_0AE1, opcode_0AE2, opcode_0AE3, nullptr, nullptr, - nullptr, nullptr, nullptr, opcode_0AE9, opcode_0AEA, - opcode_0AEB, opcode_0AEC, opcode_0AED, opcode_0AEE, opcode_0AEF, - }; - - typedef OpcodeResult(__thiscall *_OpcodeHandler)(CRunningScript *thread, unsigned short opcode); - typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { std::vector funcs; @@ -156,10 +130,6 @@ namespace CLEO { ScriptDeleteDelegate scriptDeleteDelegate; void RunScriptDeleteDelegate(CRunningScript *script) { scriptDeleteDelegate(script); } - _OpcodeHandler *oldOpcodeHandlerTable; - _OpcodeHandler newOpcodeHandlerTable[329]; - CustomOpcodeHandler extraOpcodeHandlers[100][300]; - CBuildingPool **buildingPool = nullptr; // add for future CLEO releases CVehiclePool **vehiclePool = nullptr; CObjectPool **objectPool = nullptr; @@ -191,59 +161,52 @@ namespace CLEO { CRunningScript * last_script; ptrdiff_t last_off = -1; - // opcode handler for opcodes, defined by user with cleo api - OpcodeResult __fastcall extraOpcodeHandler(CRunningScript *thread, int dummy, unsigned short opcode) - { - last_custom_opcode = opcode; - last_script = thread; - return extraOpcodeHandlers[opcode % 100][opcode / 100 - 28](thread); - } - // opcode handler for custom opcodes - OpcodeResult __fastcall customOpcodeHandler(CRunningScript *thread, int dummy, unsigned short opcode) + OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) { - auto handler = customOpcodeHandlers[opcode - 0x0A8C]; - if (handler != nullptr) + /*std::ostringstream ss; + ss << thread->GetName() << " opcode " << opcodeToStr(opcode) << std::endl; + OutputDebugStringA(ss.str().c_str());//*/ + + if(opcode > LastCustomOpcode) + { + SHOW_ERROR("Opcode [%04X] out of supported range! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); + } + + CustomOpcodeHandler handler = customOpcodeProc[opcode]; + if(handler != nullptr) { last_custom_opcode = opcode; last_script = thread; return handler(thread); } - // try with extra handlers then - return extraOpcodeHandler(thread, dummy, opcode); - } - - char ScriptExecutionLoop() - { - CCustomScript *thread; - OpcodeResult res; + // Not registered as custom opcode. Call game's original handler - _asm mov thread, esi + if (opcode > LastOriginalOpcode) + { + SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); + } - last_script = thread; + size_t tableIdx = opcode / 100; // 100 opcodes peer handler table + auto result = originalOpcodeHandlers[tableIdx](thread, opcode); - try + if(result == OR_ERROR) { - do - { - ptrdiff_t off = thread->IsCustom() ? thread->GetBytePointer() - thread->GetBasePointer() : thread->GetBytePointer() - scmBlock; - WORD opcode = thread->ReadDataWord(); - last_opcode = opcode; - last_off = off; - memcpy(last_thread, thread->GetName(), 8); - thread->SetNotFlag((opcode & 0x8000) != 0); - opcode &= 0x7FFF; - res = newOpcodeHandlerTable[opcode / 100](thread, opcode); - } while (res == OR_CONTINUE); - } - catch (const char * e) - { - char str[MAX_STR_LEN]; - sprintf(str, "%s encountered while parsing opcode '%04X' in script '%s'", e, last_opcode, last_thread); - Error(str); + SHOW_ERROR("Opcode [%04X] not found! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); } - return 0; + + return result; + } + + OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) + { + //thread->SetActive(false): // will crash game if no active script left + ((CCustomScript*)thread)->WakeTime = 0xFFFFFFFF; + return OpcodeResult::OR_INTERRUPT; } void CCustomOpcodeSystem::FinalizeScriptObjects() @@ -278,28 +241,126 @@ namespace CLEO { m_pAllocations.clear(); } + CCustomOpcodeSystem::CCustomOpcodeSystem() + { + // register CLEO opcodes + CLEO_RegisterOpcode(0x0A8C, opcode_0A8C); + CLEO_RegisterOpcode(0x0A8D, opcode_0A8D); + CLEO_RegisterOpcode(0x0A8E, opcode_0A8E); + CLEO_RegisterOpcode(0x0A8F, opcode_0A8F); + CLEO_RegisterOpcode(0x0A90, opcode_0A90); + CLEO_RegisterOpcode(0x0A91, opcode_0A91); + CLEO_RegisterOpcode(0x0A92, opcode_0A92); + CLEO_RegisterOpcode(0x0A93, opcode_0A93); + CLEO_RegisterOpcode(0x0A94, opcode_0A94); + CLEO_RegisterOpcode(0x0A95, opcode_0A95); + CLEO_RegisterOpcode(0x0A96, opcode_0A96); + CLEO_RegisterOpcode(0x0A97, opcode_0A97); + CLEO_RegisterOpcode(0x0A98, opcode_0A98); + CLEO_RegisterOpcode(0x0A99, opcode_0A99); + CLEO_RegisterOpcode(0x0A9A, opcode_0A9A); + CLEO_RegisterOpcode(0x0A9B, opcode_0A9B); + CLEO_RegisterOpcode(0x0A9C, opcode_0A9C); + CLEO_RegisterOpcode(0x0A9D, opcode_0A9D); + CLEO_RegisterOpcode(0x0A9E, opcode_0A9E); + CLEO_RegisterOpcode(0x0A9F, opcode_0A9F); + CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); + CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); + CLEO_RegisterOpcode(0x0AA2, opcode_0AA2); + CLEO_RegisterOpcode(0x0AA3, opcode_0AA3); + CLEO_RegisterOpcode(0x0AA4, opcode_0AA4); + CLEO_RegisterOpcode(0x0AA5, opcode_0AA5); + CLEO_RegisterOpcode(0x0AA6, opcode_0AA6); + CLEO_RegisterOpcode(0x0AA7, opcode_0AA7); + CLEO_RegisterOpcode(0x0AA8, opcode_0AA8); + CLEO_RegisterOpcode(0x0AA9, opcode_0AA9); + CLEO_RegisterOpcode(0x0AAA, opcode_0AAA); + CLEO_RegisterOpcode(0x0AAC, opcode_0AAC); + CLEO_RegisterOpcode(0x0AAD, opcode_0AAD); + CLEO_RegisterOpcode(0x0AAE, opcode_0AAE); + CLEO_RegisterOpcode(0x0AAF, opcode_0AAF); + CLEO_RegisterOpcode(0x0AB0, opcode_0AB0); + CLEO_RegisterOpcode(0x0AB1, opcode_0AB1); + CLEO_RegisterOpcode(0x0AB2, opcode_0AB2); + CLEO_RegisterOpcode(0x0AB3, opcode_0AB3); + CLEO_RegisterOpcode(0x0AB4, opcode_0AB4); + CLEO_RegisterOpcode(0x0AB5, opcode_0AB5); + CLEO_RegisterOpcode(0x0AB6, opcode_0AB6); + CLEO_RegisterOpcode(0x0AB7, opcode_0AB7); + CLEO_RegisterOpcode(0x0AB8, opcode_0AB8); + CLEO_RegisterOpcode(0x0AB9, opcode_0AB9); + CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); + CLEO_RegisterOpcode(0x0ABB, opcode_0ABB); + CLEO_RegisterOpcode(0x0ABC, opcode_0ABC); + CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); + CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); + CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); + CLEO_RegisterOpcode(0x0AC0, opcode_0AC0); + CLEO_RegisterOpcode(0x0AC1, opcode_0AC1); + CLEO_RegisterOpcode(0x0AC2, opcode_0AC2); + CLEO_RegisterOpcode(0x0AC3, opcode_0AC3); + CLEO_RegisterOpcode(0x0AC4, opcode_0AC4); + CLEO_RegisterOpcode(0x0AC5, opcode_0AC5); + CLEO_RegisterOpcode(0x0AC6, opcode_0AC6); + CLEO_RegisterOpcode(0x0AC7, opcode_0AC7); + CLEO_RegisterOpcode(0x0AC8, opcode_0AC8); + CLEO_RegisterOpcode(0x0AC9, opcode_0AC9); + CLEO_RegisterOpcode(0x0ACA, opcode_0ACA); + CLEO_RegisterOpcode(0x0ACB, opcode_0ACB); + CLEO_RegisterOpcode(0x0ACC, opcode_0ACC); + CLEO_RegisterOpcode(0x0ACD, opcode_0ACD); + CLEO_RegisterOpcode(0x0ACE, opcode_0ACE); + CLEO_RegisterOpcode(0x0ACF, opcode_0ACF); + CLEO_RegisterOpcode(0x0AD0, opcode_0AD0); + CLEO_RegisterOpcode(0x0AD1, opcode_0AD1); + CLEO_RegisterOpcode(0x0AD2, opcode_0AD2); + CLEO_RegisterOpcode(0x0AD3, opcode_0AD3); + CLEO_RegisterOpcode(0x0AD4, opcode_0AD4); + CLEO_RegisterOpcode(0x0AD5, opcode_0AD5); + CLEO_RegisterOpcode(0x0AD6, opcode_0AD6); + CLEO_RegisterOpcode(0x0AD7, opcode_0AD7); + CLEO_RegisterOpcode(0x0AD8, opcode_0AD8); + CLEO_RegisterOpcode(0x0AD9, opcode_0AD9); + CLEO_RegisterOpcode(0x0ADA, opcode_0ADA); + CLEO_RegisterOpcode(0x0ADB, opcode_0ADB); + CLEO_RegisterOpcode(0x0ADC, opcode_0ADC); + CLEO_RegisterOpcode(0x0ADD, opcode_0ADD); + CLEO_RegisterOpcode(0x0ADE, opcode_0ADE); + CLEO_RegisterOpcode(0x0ADF, opcode_0ADF); + CLEO_RegisterOpcode(0x0AE0, opcode_0AE0); + CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); + CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); + CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); + CLEO_RegisterOpcode(0x0AE9, opcode_0AE9); + CLEO_RegisterOpcode(0x0AEA, opcode_0AEA); + CLEO_RegisterOpcode(0x0AEB, opcode_0AEB); + CLEO_RegisterOpcode(0x0AEC, opcode_0AEC); + CLEO_RegisterOpcode(0x0AED, opcode_0AED); + CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); + CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); + CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform + } + void CCustomOpcodeSystem::Inject(CCodeInjector& inj) { TRACE("Injecting CustomOpcodeSystem..."); CGameVersionManager& gvm = GetInstance().VersionManager; - oldOpcodeHandlerTable = gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER); - - // add handler for custom opcodes - oldOpcodeHandlerTable[27] = reinterpret_cast<_OpcodeHandler>(customOpcodeHandler); - // replace old OpcodeHandlerTable with the new one - //inj.MemoryWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), reinterpret_cast<_OpcodeHandler>(&newOpcodeHandlerTable[0])); - MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), reinterpret_cast<_OpcodeHandler>(&newOpcodeHandlerTable[0])); - - // copy old table to the new - //std::copy(oldOpcodeHandlerTable, oldOpcodeHandlerTable + 28, newOpcodeHandlerTable); - MemCopy<_OpcodeHandler>(newOpcodeHandlerTable, oldOpcodeHandlerTable, (&oldOpcodeHandlerTable[28] - oldOpcodeHandlerTable) * 4); - //MemCopy(newOpcodeHandlerTable, oldOpcodeHandlerTable, oldOpcodeHandlerTable - (oldOpcodeHandlerTable + 28)); - - // fill the rest with default handler - std::fill(newOpcodeHandlerTable + 28, newOpcodeHandlerTable + 329, reinterpret_cast<_OpcodeHandler>(extraOpcodeHandler)); + // replace all handlers in original table + // store original opcode handlers for later use + _OpcodeHandler* handlersTable = gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER); + for(size_t i = 0; i < OriginalOpcodeHandlersCount; i++) + { + originalOpcodeHandlers[i] = handlersTable[i]; + handlersTable[i] = (_OpcodeHandler)customOpcodeHandler; + } - CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform + // initialize and apply new handlers table + for (size_t i = 0; i < CustomOpcodeHandlersCount; i++) + { + customOpcodeHandlers[i] = (_OpcodeHandler)customOpcodeHandler; + } + MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); FUNC_fopen = gvm.TranslateMemoryAddress(MA_FOPEN_FUNCTION); FUNC_fclose = gvm.TranslateMemoryAddress(MA_FCLOSE_FUNCTION); @@ -328,21 +389,39 @@ namespace CLEO { SpawnCar = gvm.TranslateMemoryAddress(MA_SPAWN_CAR_FUNCTION); // TODO: consider version-agnostic code - if (gvm.GetGameVersion() == GV_US10) { + if (gvm.GetGameVersion() == GV_US10) + { // make it compatible with fastman92's limit adjuster (only required for 1.0 US) RadarBlips = injector::ReadMemory(0x583A05 + 2, true); } - else { + else + { RadarBlips = gvm.TranslateMemoryAddress(MA_RADAR_BLIPS); } + } + + CCustomOpcodeSystem::_OpcodeHandler CCustomOpcodeSystem::originalOpcodeHandlers[OriginalOpcodeHandlersCount]; + CCustomOpcodeSystem::_OpcodeHandler CCustomOpcodeSystem::customOpcodeHandlers[CustomOpcodeHandlersCount]; + CustomOpcodeHandler CCustomOpcodeSystem::customOpcodeProc[LastCustomOpcode + 1]; + + bool CCustomOpcodeSystem::RegisterOpcode(WORD opcode, CustomOpcodeHandler callback) + { + if (opcode > LastCustomOpcode) + { + SHOW_ERROR("Can not register [%04X] opcode! Out of supported range.", opcode); + return false; + } - /*if(gvm.GetGameVersion() == GV_US10) + CustomOpcodeHandler& dst = customOpcodeProc[opcode]; + if (*dst != nullptr) { - inj.Nop(0x469FB0, 0x469FFB - 0x469FB0); - inj.ReplaceFunction(ScriptExecutionLoop, 0x469FF6); - inj.Nop(0x469FF2, 0x469FFB - 0x469FF2); - inj.ReplaceFunction(ScriptExecutionLoop, 0x469FF6); - }*/ + LOG_WARNING("Opcode [%04X] already registered! Skipping.", opcode); + return false; + } + + dst = callback; + TRACE("Opcode [%04X] registered", opcode); + return true; } inline CRunningScript& operator>>(CRunningScript& thread, DWORD& uval) @@ -512,9 +591,7 @@ namespace CLEO { // unsupported param type GetScriptParams(thread, 1); // skip unhandled param - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "Reading string from invalid argument type (%02X) in script '%s'", paramType, reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); + SHOW_ERROR("Reading string from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); return nullptr; } @@ -561,9 +638,7 @@ namespace CLEO { default: { GetScriptParams(thread, 1); // skip unhandled param - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "Outputing string into invalid argument type (%02X) in script '%s'", paramType, reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); + SHOW_ERROR("Outputing string into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); return false; } } @@ -576,6 +651,8 @@ namespace CLEO { // perform 'sprintf'-operation for parameters, passed through SCM int ReadFormattedString(CRunningScript *thread, char *outputStr, size_t len, const char *format) { + memset(outputStr, 0, len); + unsigned int written = 0; const char *iter = format; char bufa[256], fmtbufa[64], *fmta; @@ -724,18 +801,14 @@ namespace CLEO { if (written >= len) { - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "Error while formatting string in script '%s'. Output buffer is too short!", reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); + LOG_WARNING("Read formatted string error: Insufficient output buffer size in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return -1; } // still more var-args available if (CLEO_GetOperandType(thread) != DT_END) { - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "Error while formatting string in script '%s'. More arguments than slots in specified format!", reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); + LOG_WARNING("Read formatted string: Found more params than format slots in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); } SkipUnusedParameters(thread); // skip terminator too @@ -743,9 +816,9 @@ namespace CLEO { return (int)written; ReadFormattedString_ArgMissing: - std::string err(MAX_STR_LEN, '\0'); - sprintf(err.data(), "Error while formatting string in script '%s'. Not enough arguments to match specified format!", reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); + thread->IncPtr(); // skip vararg terminator + LOG_WARNING("Read formatted string: Not enough arguments to fulfill specified format in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + CCustomOpcodeSystem::ErrorSuspendScript(thread); return (int)written; } @@ -919,48 +992,6 @@ namespace CLEO { else fflush(convert_handle_to_file(dwHandle)); } - /*inline void __impl_RetrieveScriptParam(SCRIPT_VAR*) { } - template inline void __impl_RetrieveScriptParam(SCRIPT_VAR *, unsigned&, Params&...); - template inline void __impl_RetrieveScriptParam(SCRIPT_VAR *, int&, Params&...); - template inline void __impl_RetrieveScriptParam(SCRIPT_VAR *, float&, Params&...); - template inline void __impl_RetrieveScriptParam(SCRIPT_VAR *, ThisParam *&, Params&...); - - template - inline void __impl_RetrieveScriptParam(SCRIPT_VAR *var, unsigned& thisParam, Params&... restParams) - { - thisParam = var->dwParam; - __impl_RetrieveScriptParam(var + 1, restParams...); - } - - template - inline void __impl_RetrieveScriptParam(SCRIPT_VAR *var, int& thisParam, Params&... restParams) - { - thisParam = var->nParam; - __impl_RetrieveScriptParam(var + 1, restParams...); - } - - template - inline void __impl_RetrieveScriptParam(SCRIPT_VAR *var, float& thisParam, Params&... restParams) - { - thisParam = var->fParam; - __impl_RetrieveScriptParam(var + 1, restParams...); - } - - template - inline void __impl_RetrieveScriptParam(SCRIPT_VAR *var, ThisParam *& thisParam, Params&... restParams) - { - thisParam = reinterpret_cast(var->pParam); - __impl_RetrieveScriptParam(var + 1, restParams...); - } - - - template - inline void RetrieveScriptParams(CCustomScript *thread, Params&... params) - { - GetScriptParams(thread, sizeof...(params)); - __impl_RetrieveScriptParam(opcodeParams, params...); - }*/ - inline void ThreadJump(CRunningScript *thread, int off) { thread->SetIp(off < 0 ? thread->GetBasePointer() - off : scmBlock + off); @@ -999,7 +1030,11 @@ namespace CLEO { while (Store[allocationPlace]) // find first unused position in store { if (++allocationPlace >= store_size) allocationPlace = 0; // end of store reached - if (allocationPlace == start_search) throw std::bad_alloc(); // the store is filled up + if (allocationPlace == start_search) + { + SHOW_ERROR("CLEO function storage stack overfllow!"); + throw std::bad_alloc(); // the store is filled up + } } ScmFunction *obj = reinterpret_cast(::operator new(size)); Store[allocationPlace] = obj; @@ -1144,7 +1179,7 @@ namespace CLEO { GetInstance().CodeInjector.MemoryRead(Address, (DWORD)opcodeParams[0].dwParam, vp); break; default: - TRACE("[0A8D] Unallowed size %u", size); + SHOW_ERROR("Invalid size param (%d) of opcode [0A8D])", size); } SetScriptParams(thread, 1); @@ -1199,12 +1234,13 @@ namespace CLEO { { GetInstance().ScriptEngine.AddCustomScript(cs); TransmitScriptParams(thread, cs); + cs->SetDebugMode(reinterpret_cast(thread)->GetDebugMode()); } else { if (cs) delete cs; SkipUnusedParameters(thread); - TRACE("[0A92] Failed to load script '%s' from script '%s'.", filename.c_str(), thread->GetName()); + LOG_WARNING("Failed to load script '%s' in script ", filename.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } return OR_CONTINUE; @@ -1216,7 +1252,7 @@ namespace CLEO { CCustomScript *cs = reinterpret_cast(thread); if (thread->IsMission() || !cs->IsCustom()) { - TRACE("[0A93] Incorrect usage of opcode in script '%s'", thread->GetName()); + LOG_WARNING("[0A93] Incorrect usage of opcode in script '%s'", ((CCustomScript*)thread)->GetInfoStr().c_str()); return OR_CONTINUE; } GetInstance().ScriptEngine.RemoveCustomScript(cs); @@ -1239,12 +1275,13 @@ namespace CLEO { cs->SetCompatibility(csscript->GetCompatibility()); GetInstance().ScriptEngine.AddCustomScript(cs); TransmitScriptParams(thread, (CRunningScript*)((BYTE*)missionLocals - 0x3C)); + cs->SetDebugMode(reinterpret_cast(thread)->GetDebugMode()); } else { if (cs) delete cs; SkipUnusedParameters(thread); - TRACE("[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); + LOG_WARNING("[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); } return OR_CONTINUE; @@ -1757,7 +1794,7 @@ namespace CLEO { case 2: stream->Pause(); break; case 3: stream->Resume(); break; default: - TRACE("[0AAD] Unknown audiostream's action: %d", action); + LOG_WARNING("[0AAD] Unknown audiostream's action (%d) in script %s", action, ((CCustomScript*)thread)->GetInfoStr().c_str()); } } return OR_CONTINUE; @@ -1826,10 +1863,8 @@ namespace CLEO { default: { - std::string err(128, '\0'); - sprintf(err.data(), "Invalid first argument type (%02X) of 0AB1 opcode in script '%s'", *thread->GetBytePointer(), reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); - return OR_INTERRUPT; + SHOW_ERROR("Invalid first argument type (%02X) of [0AB1] opcode in script '%s' \nScript suspended.", *thread->GetBytePointer(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -1842,10 +1877,8 @@ namespace CLEO { auto pos = str.find('@'); if (pos == str.npos) { - std::string err(128, '\0'); - sprintf(err.data(), "Invalid module reference '%s' in 0AB1 opcode in script '%s'", moduleTxt, reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); - return OR_INTERRUPT; + SHOW_ERROR("Invalid module reference '%s' in 0AB1 opcode in script '%s' \nScript suspended.", moduleTxt, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } std::string_view strExport = str.substr(0, pos); std::string_view strModule = str.substr(pos + 1); @@ -1858,10 +1891,8 @@ namespace CLEO { auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath, strExport); if (!scriptRef.Valid()) { - std::string err(128, '\0'); - sprintf(err.data(), "Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], reinterpret_cast(thread)->GetScriptFileName()); - Error(err.data()); - return OR_INTERRUPT; + SHOW_ERROR("Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } scmFunc->moduleExportRef = scriptRef.base; // to be released on return @@ -2772,18 +2803,7 @@ extern "C" BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, CustomOpcodeHandler callback) { - if ((opcode > 0x7FFF) || (opcode < 0x0AAB)) - return FALSE; - - CustomOpcodeHandler& dst = extraOpcodeHandlers[opcode % 100][opcode / 100 - 28]; - - if (*dst) - { - Error("Warning! CLEO couldn't register opcode handler."); - return FALSE; - } - dst = callback; - return TRUE; + return CCustomOpcodeSystem::RegisterOpcode(opcode, callback); } #ifdef _MSC_VER @@ -2828,11 +2848,26 @@ extern "C" return ReadStringParam(thread, buf, size); } - void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, LPCSTR str) + void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { WriteStringParam(thread, str); } + char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int size) + { + static char internal_buf[MAX_STR_LEN * 4]; + if (!buf) { buf = internal_buf; size = sizeof(internal_buf); } + if (!size) size = MAX_STR_LEN; + std::fill(buf, buf + size, '\0'); + + if(format != nullptr && strlen(format) > 0) + ReadFormattedString(thread, buf, size, format); + else + SkipUnusedParameters(thread); + + return buf; + } + void WINAPI CLEO_SetThreadCondResult(CLEO::CRunningScript* thread, BOOL result) { SetScriptCondResult(thread, result != FALSE); @@ -2940,12 +2975,16 @@ extern "C" { GetInstance().ScriptEngine.AddCustomScript(cs); if (fromThread) TransmitScriptParams(fromThread, cs); + + cs->SetDebugMode(fromThread ? + reinterpret_cast(fromThread)->GetDebugMode() : // from parent + GetInstance().ScriptEngine.NativeScriptsDebugMode); // global } else { if (cs) delete cs; if (fromThread) SkipUnusedParameters(fromThread); - TRACE("Failed to load script '%s'.", script_name); + LOG_WARNING("Failed to load script '%s'.", script_name); return nullptr; } @@ -2969,7 +3008,7 @@ extern "C" void WINAPI CLEO_ResolvePath(CLEO::CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) { - if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 1) + if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 2) { return; // invalid param } @@ -2981,4 +3020,29 @@ extern "C" std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator } + + BOOL WINAPI CLEO_GetScriptDebugMode(const CLEO::CRunningScript* thread) + { + return reinterpret_cast(thread)->GetDebugMode(); + } + + void WINAPI CLEO_SetScriptDebugMode(CLEO::CRunningScript* thread, BOOL enabled) + { + reinterpret_cast(thread)->SetDebugMode(enabled); + } + + void WINAPI CLEO_GetScriptInfoStr(CLEO::CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize) + { + if (thread == nullptr || buf == nullptr || bufSize < 2) + { + return; // invalid param + } + + auto text = reinterpret_cast(thread)->GetInfoStr(currLineInfo); + + if (text.length() >= bufSize) + text.resize(bufSize - 1); // and terminator character + + std::memcpy(buf, text.c_str(), text.length() + 1); // with terminator + } } \ No newline at end of file diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index b560cdc1..9f60279e 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -18,6 +18,24 @@ namespace CLEO class CCustomOpcodeSystem : public VInjectible { + public: + static const size_t LastOriginalOpcode = 0x0A4E; // GTA SA + static const size_t LastCustomOpcode = 0x7FFF; + + void FinalizeScriptObjects(); + + CCustomOpcodeSystem(); + virtual void Inject(CCodeInjector& inj); + ~CCustomOpcodeSystem() + { + //TRACE("Last opcode executed %04X at %s:%d", last_opcode, last_thread, last_off); + } + + static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); + + static OpcodeResult ErrorSuspendScript(CRunningScript* thread); // suspend script execution forever + + private: friend OpcodeResult __stdcall opcode_0A9A(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0A9B(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AA2(CRunningScript *pScript); @@ -25,18 +43,21 @@ namespace CLEO friend OpcodeResult __stdcall opcode_0AC8(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC9(CRunningScript *pScript); - public: std::set m_hFiles; std::set m_hNativeLibs; - std::set m_pAllocations; + std::set m_pAllocations; - void FinalizeScriptObjects(); + typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); - virtual void Inject(CCodeInjector& inj); - ~CCustomOpcodeSystem() - { - //TRACE("Last opcode executed %04X at %s:%d", last_opcode, last_thread, last_off); - } + static const size_t OriginalOpcodeHandlersCount = (LastOriginalOpcode / 100) + 1; // 100 opcodes peer handler + static _OpcodeHandler originalOpcodeHandlers[OriginalOpcodeHandlersCount]; // backuped when patching + + static const size_t CustomOpcodeHandlersCount = (LastCustomOpcode / 100) + 1; // 100 opcodes peer handler + static _OpcodeHandler customOpcodeHandlers[CustomOpcodeHandlersCount]; // original + new opcodes + + static OpcodeResult __fastcall customOpcodeHandler(CRunningScript* thread, int dummy, WORD opcode); // universal CLEO's opcode handler + + static CustomOpcodeHandler customOpcodeProc[LastCustomOpcode + 1]; // procedure for each opcode }; extern void(__thiscall * ProcessScript)(CRunningScript*); diff --git a/source/CDebug.cpp b/source/CDebug.cpp index 12eacc56..00c73f36 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -1,18 +1,76 @@ #include "stdafx.h" #include "CDebug.h" +#include "CleoBase.h" CDebug Debug; +using namespace CLEO; -void Error(const char *szStr) +void CDebug::Trace(eLogLevel level, const char* format, ...) { - MessageBox(nullptr, szStr, "CLEO error", MB_ICONERROR | MB_OK); - TRACE("[Error] Exiting with error: %s", szStr); - //exit(1); + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); } -void Warning(const char *szStr) +const char* CDebug::TraceVArg(CLEO::eLogLevel level, const char* format, va_list args) { - MessageBox(nullptr, szStr, "CLEO warning", MB_ICONWARNING | MB_OK); - TRACE("[Warning] %s", szStr); - //exit(1); + std::lock_guard guard(mutex); + + static char szBuf[1024]; + + // time stamp + SYSTEMTIME t; + GetLocalTime(&t); + sprintf(szBuf, "%02d/%02d/%04d %02d:%02d:%02d.%03d ", t.wDay, t.wMonth, t.wYear, t.wHour, t.wMinute, t.wSecond, t.wMilliseconds); + char* stampEnd = szBuf + strlen(szBuf); + + // put params into format + vsprintf(stampEnd, format, args); + + // output to file + if(m_hFile.good()) + m_hFile << szBuf << std::endl; + + // output to console +#ifdef _DEBUG + OutputDebugString(szBuf); + OutputDebugString("\n"); +#endif + + // output to callbacks + auto& cleo = GetInstance(); + if (cleo.IsStarted()) + { + for (void* func : cleo.GetCallbacks(eCallbackId::Log)) + { + typedef void WINAPI callback(eLogLevel, const char*); + ((callback*)func)(level, stampEnd); + } + } + + return stampEnd; +} + +void CDebug::Error(const char* format, ...) +{ + va_list args; + va_start(args, format); + auto msg = TraceVArg(eLogLevel::Error, format, args); + va_end(args); + + auto mainWnd = GetInstance().MainWnd; + PostMessage(mainWnd, WM_SYSCOMMAND, SC_MINIMIZE, 0); + ShowWindow(mainWnd, SW_MINIMIZE); + MessageBox(mainWnd, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + PostMessage(mainWnd, WM_SYSCOMMAND, SC_RESTORE, 0); + ShowWindow(mainWnd, SW_RESTORE); +} + +extern "C" +{ + void WINAPI CLEO_Log(eLogLevel level, const char* msg) + { + Debug.Trace(level, "%s", msg); + } } diff --git a/source/CDebug.h b/source/CDebug.h index f4ed0858..e2129132 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -1,62 +1,38 @@ #pragma once #include -#define TRACE __noop - -#ifdef DEBUGIT -#undef TRACE -#define TRACE(a,...) {Debug.Trace(a, __VA_ARGS__);} -#endif +#define TRACE(a,...) {Debug.Trace(CLEO::eLogLevel::Default, a, __VA_ARGS__);} +#define LOG_WARNING(a,...) {Debug.Trace(CLEO::eLogLevel::Error, a, __VA_ARGS__);} +#define SHOW_ERROR(a,...) {Debug.Error(a, __VA_ARGS__);} const char szLogFileName[] = "cleo.log"; class CDebug { - std::mutex mutex; - -#ifdef DEBUGIT - std::ofstream m_hFile; -#endif - public: -#ifdef DEBUGIT - CDebug() : m_hFile(szLogFileName) { - Trace("Log started."); + Trace(CLEO::eLogLevel::Default, "Log started."); #ifdef _DEBUG - Trace("CLEO v%s DEBUG", CLEO_VERSION_STR); + Trace(CLEO::eLogLevel::Default, "CLEO v%s DEBUG", CLEO_VERSION_STR); #else - Trace("CLEO v%s", CLEO_VERSION_STR); + Trace(CLEO::eLogLevel::Default, "CLEO v%s", CLEO_VERSION_STR); #endif } ~CDebug() { - Trace("Log finished."); + Trace(CLEO::eLogLevel::Default, "Log finished."); } - - void Trace(const char *format, ...) - { - std::lock_guard guard(mutex); - - SYSTEMTIME t; - static char szBuf[1024]; - - GetLocalTime(&t); - sprintf(szBuf, "%02d/%02d/%04d %02d:%02d:%02d.%03d ", t.wDay, t.wMonth, t.wYear, t.wHour, t.wMinute, t.wSecond, t.wMilliseconds); - va_list arg; - va_start(arg, format); - vsprintf(szBuf + strlen(szBuf), format, arg); - va_end(arg); - m_hFile << szBuf << std::endl; - OutputDebugString(szBuf); - OutputDebugString("\n"); - } -#endif + + void Trace(CLEO::eLogLevel level, const char* format, ...); + void Error(const char* format, ...); + +private: + std::mutex mutex; + std::ofstream m_hFile; + const char* TraceVArg(CLEO::eLogLevel level, const char* format, va_list args); }; extern CDebug Debug; -void Warning(const char *); -void Error(const char *); diff --git a/source/CDmaFix.cpp b/source/CDmaFix.cpp index 0e378833..d09d044e 100644 --- a/source/CDmaFix.cpp +++ b/source/CDmaFix.cpp @@ -44,7 +44,7 @@ namespace CLEO inj.Nop(0x4698F6, 3); break; default: - Error("CDmaFix::Inject(): Unimplemented game version."); + SHOW_ERROR("CDmaFix::Inject(): Unimplemented game version."); } } } diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index e03daa95..0badc8f3 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -91,6 +91,17 @@ namespace CLEO SetLetterColor(RGBA(/*0xE1, 0xE1, 0xE1, 0xFF*/0xAD, 0xCE, 0xC4, 0xFF)); TextDraw(CGameMenu_ScaleX(MenuManager, 6.0f), CGameMenu_ScaleY(MenuManager, 436.0f), cleo_text.str().c_str()); } + + // execute callbacks + auto& cleo = GetInstance(); + if (cleo.IsStarted()) + { + for (void* func : cleo.GetCallbacks(eCallbackId::MenuDraw)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } + } } void CGameMenu::Inject(CCodeInjector& inj) diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 5fd539f2..c66ddbbe 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -71,7 +71,8 @@ bool CModuleSystem::LoadDirectory(const char* path) bool CModuleSystem::LoadCleoModules() { std::string path = CFileMgr::ms_rootDirName; - path += "\\cleo\\cleo_modules"; + if (!path.empty() && path.back() != '\\') path.push_back('\\'); + path += "cleo\\cleo_modules"; return LoadDirectory(path.c_str()); } @@ -153,7 +154,7 @@ void CModuleSystem::CModule::Update() auto file = filepath; auto result = LoadFromFile(file.c_str()); updateNeeded = false; - TRACE("Module reload %s '%s'", result ? "OK" : "FAILED", file.c_str()); + Debug.Trace(eLogLevel::Debug, "Module reload %s '%s'", result ? "OK" : "FAILED", file.c_str()); } } @@ -210,7 +211,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) std::ifstream file(path, std::ios::binary); if (!file.good()) { - TRACE("Failed to open module file '%s'", path); + LOG_WARNING("Failed to open module file '%s'", path); return false; } @@ -235,7 +236,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) file.read((char*)&segment, sizeof(segment)); if (file.fail()) { - TRACE("Module '%s' file header read error", path); + LOG_WARNING("Module '%s' file header read error", path); return false; } @@ -244,7 +245,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) segment.jumpAddress >= 0 || // jump labels should be negative values std::memcmp(segment.magic, Segment_Magic, sizeof(Segment_Magic)) != 0) // not a custom header { - TRACE("Module '%s' load error. Custom segment not present", path); + LOG_WARNING("Module '%s' load error. Custom segment not present", path); return false; } segment.jumpAddress = abs(segment.jumpAddress); // turn label into actual file offset @@ -265,7 +266,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) if (file.fail() || file.tellg() > segment.jumpAddress) // read past the segment end { - TRACE("Module '%s' load error. Invalid custom header", path); + LOG_WARNING("Module '%s' load error. Invalid custom header", path); return false; } @@ -277,7 +278,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) { if (headerEndPos > segment.jumpAddress) { - TRACE("Module '%s' load error. Invalid size of exports header", path); + LOG_WARNING("Module '%s' load error. Invalid size of exports header", path); return false; } @@ -291,11 +292,11 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) { if (e.name.empty()) { - TRACE("Module '%s' export load error.", path); + LOG_WARNING("Module '%s' export load error.", path); } else { - TRACE("Module's '%s' export '%s' load error.", path, e.name.c_str()); + LOG_WARNING("Module's '%s' export '%s' load error.", path, e.name.c_str()); } return false; } @@ -314,7 +315,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) file.seekg(headerEndPos, file.beg); if (file.fail()) { - TRACE("Module '%s' load error. Error while skipping unknown header type", path); + LOG_WARNING("Module '%s' load error. Error while skipping unknown header type", path); return false; } } @@ -322,13 +323,13 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) if (!file.good()) { - TRACE("Module '%s' read error", path); + LOG_WARNING("Module '%s' read error", path); return false; } if (!result) // no usable elements found. No point to keeping this module { - TRACE("Module '%s' skipped. Nothing found", path); + LOG_WARNING("Module '%s' skipped. Nothing found", path); return false; } diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index f69bd7b3..3940a836 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -17,13 +17,11 @@ namespace CLEO TRACE("Loading plugins..."); FilesWalk("cleo\\cleo_plugins", ".cleo", [this](const char *filename) { - TRACE("Loading plugin %s", filename); + TRACE("Loading plugin '%s'", filename); HMODULE hlib = LoadLibrary(filename); if (!hlib) { - char message[MAX_PATH + 40]; - sprintf(message, "Error loading plugin %s", filename); - Warning(message); + LOG_WARNING("Error loading plugin '%s'", filename); } else plugins.push_back(hlib); }); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 997d3a2c..88e64919 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -4,6 +4,7 @@ #include "CGame.h" #include +#include namespace CLEO { @@ -215,6 +216,8 @@ namespace CLEO GetInstance().TextManager.ClearDynamicFxts(); GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().SoundSystem.UnloadAllStreams(); + + GetInstance().ScriptEngine.Initialize(); GetInstance().ScriptEngine.LoadCustomScripts(); for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit2)) @@ -233,6 +236,8 @@ namespace CLEO GetInstance().TextManager.ClearDynamicFxts(); GetInstance().OpcodeSystem.FinalizeScriptObjects(); GetInstance().SoundSystem.UnloadAllStreams(); + + GetInstance().ScriptEngine.Initialize(); GetInstance().ScriptEngine.LoadCustomScripts(true); for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit3)) @@ -585,7 +590,23 @@ namespace CLEO *useTextCommands = UseTextCommands; } - const char* CCustomScript::GetScriptFileDir() const + bool CCustomScript::GetDebugMode() const + { + if (!bIsCustom) + return GetInstance().ScriptEngine.NativeScriptsDebugMode; + + return bDebugMode; + } + + void CCustomScript::SetDebugMode(bool enabled) + { + if (!bIsCustom) + GetInstance().ScriptEngine.NativeScriptsDebugMode = enabled; + else + bDebugMode = enabled; + } + + const char* CCustomScript::GetScriptFileDir() const { if(!bIsCustom) return GetInstance().ScriptEngine.MainScriptFileDir.c_str(); @@ -644,7 +665,7 @@ namespace CLEO if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter { result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); - result.push_back('\\'); + if (!result.empty() && result.back() != '\\') result.push_back('\\'); result += path; } else @@ -666,11 +687,14 @@ namespace CLEO if (result[0] == DIR_SCRIPT[0]) // current script location { - return std::string(GetScriptFileDir()) + &result[2]; // original path without '2:' prefix; + std::string resolved = ResolvePath(GetScriptFileDir()); + resolved += &result[2]; // original path without '2:' prefix; + return resolved; } // game root directory std::string resolved = CFileMgr::ms_rootDirName; + if(!resolved.empty() && resolved.back() == '\\') resolved.pop_back(); if (result[0] == DIR_CLEO[0]) // cleo directory { @@ -685,6 +709,38 @@ namespace CLEO return resolved; } + std::string CCustomScript::GetInfoStr(bool currLineInfo) const + { + std::ostringstream ss; + + auto threadName = GetName(); + auto fileName = GetScriptFileName(); + + if(memcmp(threadName, fileName, strlen(threadName)) != 0) // thread name no longer same as filename (was set with 03A4) + { + ss << "'" << threadName << "' from "; + } + + ss << "'" << fileName << "'"; + + if(currLineInfo) + { + ss << " at "; + + if(false) + { + // TODO: get Sanny's SMC extra info + } + else + { + auto address = (DWORD)CurrentIP - (DWORD)BaseIP; + ss << "0x" << std::hex << std::uppercase << /*std::setw(4) << std::setfill('0') <<*/ address; + } + } + + return ss.str(); + } + void CCustomScript::StoreScriptTextures() { // store this scripts textures + restore SCM textures + make sure this scripts textures arent cleared by another @@ -848,6 +904,7 @@ namespace CLEO MainScriptFileName = "scr.scm"; } + NativeScriptsDebugMode = GetPrivateProfileInt("General", "DebugMode", 0, GetInstance().ConfigFilename.c_str()) != 0; MainScriptCurWorkDir = DIR_GAME; } @@ -909,12 +966,14 @@ namespace CLEO // [game root]\cleo std::string scriptsDir = CFileMgr::ms_rootDirName; - scriptsDir += "\\cleo"; + if (!scriptsDir.empty() && scriptsDir.back() != '\\') scriptsDir.push_back('\\'); + scriptsDir += "cleo"; TRACE("Searching for cleo scripts"); FilesWalk(scriptsDir.c_str(), cs_ext, [this](const char *filename) { - LoadScript(filename); + auto cs = LoadScript(filename); + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state }); FilesWalk(scriptsDir.c_str(), cs4_ext, [this](const char *filename) { @@ -945,6 +1004,8 @@ namespace CLEO return nullptr; } + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + // check whether the script is in stop-list if (stopped_info) { @@ -1211,7 +1272,7 @@ namespace CLEO is.exceptions(std::ios::badbit | std::ios::failbit); std::size_t length; is.seekg(0, std::ios::end); - length = is.tellg(); + length = (size_t)is.tellg(); is.seekg(0, std::ios::beg); if (bIsMiss) @@ -1238,11 +1299,11 @@ namespace CLEO } catch (std::exception& e) { - TRACE("Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); + LOG_WARNING("Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); } catch (...) { - TRACE("Unknown error during loading of custom script %s occured.", szFileName); + LOG_WARNING("Unknown error during loading of custom script %s occured.", szFileName); } } diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 6218fec1..942041db 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -11,6 +11,7 @@ namespace CLEO class CCustomScript : public CRunningScript { friend class CScriptEngine; + friend class CCustomOpcodeSystem; friend struct ScmFunction; friend struct ThreadSavingInfo; @@ -28,6 +29,8 @@ namespace CLEO std::vector script_draws; std::vector script_texts; + bool bDebugMode; // debug mode enabled? + std::string scriptFileDir; std::string scriptFileName; std::string workDir; @@ -68,6 +71,10 @@ namespace CLEO void StoreScriptCustoms(); void RestoreScriptCustoms(); + // debug related utils enabled? + bool GetDebugMode() const; + void SetDebugMode(bool enabled); + // absolute path to directory where script's source file is located const char* GetScriptFileDir() const; void SetScriptFileDir(const char* directory); @@ -82,6 +89,9 @@ namespace CLEO // create absolute file path std::string ResolvePath(const char* path, const char* customWorkDir = nullptr) const; + + // get short info text about script + std::string GetInfoStr(bool currLineInfo = true) const; }; class CScriptEngine : VInjectible @@ -94,6 +104,7 @@ namespace CLEO CCustomScript *LoadScript(const char *szFilePath); public: + bool NativeScriptsDebugMode; // debug mode enabled? std::string MainScriptFileDir; std::string MainScriptFileName; std::string MainScriptCurWorkDir; diff --git a/source/CSoundSystem.cpp b/source/CSoundSystem.cpp index 0a19bfdb..4a9e8677 100644 --- a/source/CSoundSystem.cpp +++ b/source/CSoundSystem.cpp @@ -12,11 +12,13 @@ namespace CLEO HWND OnCreateMainWindow(HINSTANCE hinst) { - if (HIWORD(BASS_GetVersion()) != BASSVERSION) Error("An incorrect version of bass.dll has been loaded"); + if (HIWORD(BASS_GetVersion()) != BASSVERSION) LOG_WARNING("An incorrect version of bass.dll has been loaded"); TRACE("Creating main window..."); - HWND wnd = CreateMainWindow(hinst); - if (!GetInstance().SoundSystem.Init(wnd)) TRACE("CSoundSystem::Init() failed. Error code: %d", BASS_ErrorGetCode()); - return wnd; + auto mainWnd = CreateMainWindow(hinst); + if (!GetInstance().SoundSystem.Init(mainWnd)) SHOW_ERROR("CSoundSystem::Init() failed. Error code: %d", BASS_ErrorGetCode()); + + GetInstance().MainWnd = mainWnd; + return mainWnd; } CPlaceable *camera; @@ -127,7 +129,7 @@ namespace CLEO BASS_Apply3D(); return true; } - Warning("Could not initialize BASS sound system"); + LOG_WARNING("Could not initialize BASS sound system"); return false; } @@ -231,7 +233,7 @@ namespace CLEO if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(src, 0, flags, 0, nullptr))) { - TRACE("Loading audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); + LOG_WARNING("Loading audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); } else OK = true; } @@ -249,7 +251,7 @@ namespace CLEO if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(src, 0, flags, nullptr, nullptr))) { - TRACE("Loading 3d-audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); + LOG_WARNING("Loading 3d-audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); } else { diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index ac8e79c2..c4ed702e 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -114,10 +114,7 @@ namespace CLEO } catch (std::exception& ex) { - std::ostringstream ss; - ss << "Loading of FXT file " << fname << " failed\n"; - ss << ex.what(); - Warning(ss.str().c_str()); + LOG_WARNING("Loading of FXT file '%s' failed: \n%s", fname, ex.what()); } }); } @@ -139,7 +136,7 @@ namespace CLEO { if (!dynamic || fxt->second->is_static) { - TRACE("Attempting to add FXT \'%s\' - FAILED (GXT conflict)", key, value); + LOG_WARNING("Attempting to add FXT \'%s\' - FAILED (GXT conflict)", key, value); return false; } diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 50b5d779..9429affa 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "CleoBase.h" +#include namespace CLEO @@ -28,6 +29,10 @@ namespace CLEO void CCleoInstance::Start() { + if (m_bStarted) return; // already started + + ConfigFilename = std::filesystem::current_path().append("cleo\\.cleo_config.ini").string(); + CreateDirectory("cleo", NULL); CreateDirectory("cleo/cleo_modules", NULL); CreateDirectory("cleo/cleo_saves", NULL); @@ -41,11 +46,15 @@ namespace CLEO SoundSystem.Inject(CodeInjector); OpcodeSystem.Inject(CodeInjector); ScriptEngine.Inject(CodeInjector); + + m_bStarted = true; } void CCleoInstance::Stop() { if (!m_bStarted) return; + + m_bStarted = false; } void CCleoInstance::AddCallback(eCallbackId id, void* func) diff --git a/source/CleoBase.h b/source/CleoBase.h index c28ac0df..60c675f5 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -34,12 +34,17 @@ namespace CLEO CPluginSystem PluginSystem; //CLegacy Legacy; + HWND MainWnd; + std::string ConfigFilename; + CCleoInstance(); virtual ~CCleoInstance(); void Start(); void Stop(); + bool IsStarted() const { return m_bStarted; } + void AddCallback(eCallbackId id, void* func); const std::set& GetCallbacks(eCallbackId id); diff --git a/source/cleo.def b/source/cleo.def index 7fd4dfc4..394aad2f 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -30,4 +30,8 @@ EXPORTS _CLEO_ResolvePath@12 @27 _CLEO_GetScriptVersion@4 @28 _CLEO_RegisterCallback@8 @29 + _CLEO_GetScriptDebugMode@4 @30 + _CLEO_SetScriptDebugMode@8 @31 + _CLEO_Log@8 @32 + _CLEO_ReadParamsFormatted@16 @33 diff --git a/source/cleo_config.ini b/source/cleo_config.ini new file mode 100644 index 00000000..2253e7a7 --- /dev/null +++ b/source/cleo_config.ini @@ -0,0 +1,3 @@ +[General] +; debug opcodes, on screen prints etc.: 0 - off, 1 - enabled +DebugMode=0 diff --git a/source/dllmain.cpp b/source/dllmain.cpp index 14a6f846..abdbd40b 100644 --- a/source/dllmain.cpp +++ b/source/dllmain.cpp @@ -16,7 +16,7 @@ class Starter ""); if (gv != CLEO::GV_US10 && gv != CLEO::GV_EU11 && gv != CLEO::GV_EU10 && gv != CLEO::GV_STEAM) - Error( + SHOW_ERROR( "Unknown game version.\n" "The list of all supported executables:\n\n" " 1) gta_sa.exe, original 1.0 us, 14 405 632 bytes;\n" @@ -33,6 +33,7 @@ class Starter CLEO::GetInstance().Start(); } + ~Starter() { CLEO::GetInstance().Stop(); @@ -43,39 +44,5 @@ Starter Starter::dummy; extern "C" BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { - /*auto gv = CLEO::GetInstance().VersionManager.GetGameVersion(); - - switch (fdwReason) - { - case DLL_PROCESS_ATTACH: - TRACE("Started on game of version: %s", - (gv == CLEO::GV_US10)? "SA 1.0 us" : - (gv == CLEO::GV_EU11)? "SA 1.01 eu" : - (gv == CLEO::GV_EU10)? "SA 1.0 eu" : - (gv == CLEO::GV_STEAM)? "SA 3.0 steam" : - ""); - - if (gv != CLEO::GV_US10 && gv != CLEO::GV_EU11 && gv != CLEO::GV_EU10 && gv != CLEO::GV_STEAM) - Error( - "Unknown game version.\n" - "The list of all supported executables:\n\n" - " 1) gta_sa.exe, original 1.0 us, 14 405 632 bytes;\n" - " 2) gta_sa.exe, public no-dvd 1.0 us, 14 383 616 bytes;\n" - " 3) gta_sa_compact.exe, listener's executable, 5 189 632 bytes;\n" - " 4) gta_sa.exe, original 1.01 eu, 14 405 632 bytes;\n" - " 5) gta_sa.exe, public no-dvd 1.01 eu, 15 806 464 bytes;\n" - " 6) gta_sa.exe, 1C localization, 15 806 464 bytes;\n" - " 7) gta_sa.exe, original 1.0 eu, unknown size;\n" - " 8) gta_sa.exe, public no-dvd 1.0eu, 14 386 176 bytes;\n" - " 9) gta_sa.exe, original 3.0 steam executable, unknown size;" - " 10) gta_sa.exe, decrypted 3.0 steam executable, 5 697 536 bytes." - ); - - CLEO::GetInstance().Start(); - break; - case DLL_PROCESS_DETACH: - CLEO::GetInstance().Stop(); - break; - }*/ return TRUE; } diff --git a/source/stdafx.h b/source/stdafx.h index dd255cda..8526306c 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -1,6 +1,5 @@ #pragma once #pragma warning(disable:4733) -#define DEBUGIT #define _USE_MATH_DEFINES #define WIN32_LEAN_AND_MEAN #undef UNICODE From 22c527a00bed12ea1846e49d55bce1f3ea11570c Mon Sep 17 00:00:00 2001 From: Miran Date: Fri, 13 Oct 2023 03:28:18 +0200 Subject: [PATCH 018/216] Allow replacing CLEO's opcodes. --- source/CCustomOpcodeSystem.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b65e0e4f..2f533cd6 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -415,8 +415,7 @@ namespace CLEO { CustomOpcodeHandler& dst = customOpcodeProc[opcode]; if (*dst != nullptr) { - LOG_WARNING("Opcode [%04X] already registered! Skipping.", opcode); - return false; + LOG_WARNING("Opcode [%04X] already registered! Replacing...", opcode); } dst = callback; From fb4dedacea3a375430d36865f2dff64d5509a935 Mon Sep 17 00:00:00 2001 From: Miran Date: Fri, 13 Oct 2023 18:11:37 +0200 Subject: [PATCH 019/216] fixup! Allow replacing CLEO's opcodes. --- cleo_plugins/DebugUtils/DebugUtils.cpp | 39 +++++++++++++------------- cleo_plugins/DebugUtils/ScreenLog.cpp | 31 +++++++++++--------- source/CCustomOpcodeSystem.cpp | 4 +-- source/CScriptEngine.cpp | 1 + 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index ec7586e9..d62e9f3e 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -113,7 +113,7 @@ class DebugUtils for (size_t i = 0; i < KeyCount; i++) { auto state = GetKeyState(KeyFirst + i); - if (state & 0x8000) // pressed + if (state & 0x8000) // key down { keysReleased = false; break; @@ -122,31 +122,29 @@ class DebugUtils } else // ready for next press { - for (size_t i = 0; i < pausedScripts.size(); i++) + const size_t count = min(pausedScripts.size(), KeyCount); + for (size_t i = 0; i < count; i++) { - if (keysReleased && i < KeyCount) + auto state = GetKeyState(KeyFirst + i); + if (state & 0x8000) // key down { - auto state = GetKeyState(KeyFirst + i); - if (state & 0x8000) // pressed - { - keysReleased = false; + keysReleased = false; - std::stringstream ss; - ss << "Script breakpoint "; - if (!pausedScripts[i].msg.empty()) ss << "'" << pausedScripts[i].msg << "' "; - ss << "released in '" << pausedScripts[i].ptr->GetName() << "'"; - CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + std::stringstream ss; + ss << "Script breakpoint "; + if (!pausedScripts[i].msg.empty()) ss << "'" << pausedScripts[i].msg << "' "; + ss << "released in '" << pausedScripts[i].ptr->GetName() << "'"; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); - if (CTimer::m_CodePause) - { - CLEO_Log(eLogLevel::Debug, "Game unpaused"); - CTimer::m_CodePause = false; - } + if (CTimer::m_CodePause) + { + CLEO_Log(eLogLevel::Debug, "Game unpaused"); + CTimer::m_CodePause = false; + } - pausedScripts.erase(pausedScripts.begin() + i); + pausedScripts.erase(pausedScripts.begin() + i); - break; // breakpoint continue - } + break; // breakpoint continue } } } @@ -256,6 +254,7 @@ class DebugUtils { auto filestr = CLEO_ReadStringOpcodeParam(thread); + // normalized absolute filepath std::string filename(MAX_PATH, '\0'); const size_t len = strlen(filestr); for(size_t i = 0; i < len; i++) diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 70068763..719988ae 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -30,11 +30,17 @@ void ScreenLog::Add(eLogLevel level, const char* msg) } // calculate end time - auto end = DWORD(0.3f * timeDisplay); - end += DWORD(0.7f * timeDisplay * strlen(msg) / 40); // assume 40 characters as baseline - end += GetTime(); + auto duration = DWORD(0.2f * timeDisplay); + duration += DWORD(0.8f * timeDisplay * strlen(msg) / 40); // assume 40 characters as baseline + duration = min(duration, 3 * timeDisplay); + + /*auto startTime = GetTime(); + if(!entries.empty()) startTime = entries.front().endTime; + entries.emplace_front(level, msg, startTime + duration);*/ - entries.emplace_front(level, msg, end); + auto endTime = GetTime() + duration; + if (!entries.empty()) endTime = max(endTime, entries.front().endTime + 200); + entries.emplace_front(level, msg, endTime); if (entries.size() > maxMessages) { @@ -42,7 +48,7 @@ void ScreenLog::Add(eLogLevel level, const char* msg) } // update scroll pos - float sizeY = fontSize * static_cast(RsGlobal.maximumHeight) / 448.0f; + float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; size_t lines = CountLines(std::string(msg)); scrollOffset += 18.0f * lines * sizeY; } @@ -97,23 +103,20 @@ void ScreenLog::Draw() { auto& entry = entries[i]; - // carry on from any following text that is longer - auto endTime = entry.endTime; - for (size_t j = i + 1; j < entries.size(); j++) - { - endTime = max(endTime, entries[j].endTime); - } - BYTE alpha = 255; - if (endTime < now) + if (entry.endTime < now) { - auto elapsed = now - endTime; + auto elapsed = now - entry.endTime; float fadeProgress = (float)elapsed / timeFadeout; fadeProgress = std::clamp(fadeProgress, 0.0f, 1.0f); fadeProgress = 1.0f - fadeProgress; // fade out fadeProgress = sqrtf(fadeProgress); alpha = (BYTE)(fadeProgress * 0xFF); } + else if(entry.endTime > (now + 4 * timeDisplay)) + { + entry.endTime = now + 4 * timeDisplay; + } auto color = fontColor[(size_t)entry.level]; alpha = min(alpha, color.a); diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 2f533cd6..48a3505b 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -650,8 +650,6 @@ namespace CLEO { // perform 'sprintf'-operation for parameters, passed through SCM int ReadFormattedString(CRunningScript *thread, char *outputStr, size_t len, const char *format) { - memset(outputStr, 0, len); - unsigned int written = 0; const char *iter = format; char bufa[256], fmtbufa[64], *fmta; @@ -2370,7 +2368,7 @@ namespace CLEO { else dst = &GetScriptParamPointer(thread)->cParam; ReadStringParam(thread, fmt, sizeof(fmt)); - ReadFormattedString(thread, dst, MAX_STR_LEN, fmt); + ReadFormattedString(thread, dst, MAX_STR_LEN, fmt); // TODO: get actual length limit based on target type return OR_CONTINUE; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 88e64919..9b88fba7 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -420,6 +420,7 @@ namespace CLEO void __fastcall HOOK_ProcessScript(CCustomScript * pScript, int) { + // run registered callbacks bool process = true; for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptProcess)) { From ae56681b3e19e4e695b2def123758bb7df9a07ef Mon Sep 17 00:00:00 2001 From: Miran Date: Fri, 13 Oct 2023 20:10:25 +0200 Subject: [PATCH 020/216] fixup! Allow replacing CLEO's opcodes. --- cleo_plugins/DebugUtils/ScreenLog.cpp | 57 +++++++++++++-------------- cleo_plugins/DebugUtils/ScreenLog.h | 16 +++++--- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 719988ae..6c7d1476 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -18,7 +18,7 @@ void ScreenLog::Init() level = (eLogLevel)GetPrivateProfileInt("ScreenLog", "Level", (UINT)eLogLevel::None, config.c_str()); maxMessages = GetPrivateProfileInt("ScreenLog", "MessagesMax", 40, config.c_str()); timeDisplay = GetPrivateProfileInt("ScreenLog", "MessageTime", 6000, config.c_str()); - timeFadeout = 2000; + timeFadeout = 3000; fontSize = 0.01f * GetPrivateProfileInt("ScreenLog", "FontSize", 60, config.c_str()); } @@ -29,18 +29,7 @@ void ScreenLog::Add(eLogLevel level, const char* msg) return; } - // calculate end time - auto duration = DWORD(0.2f * timeDisplay); - duration += DWORD(0.8f * timeDisplay * strlen(msg) / 40); // assume 40 characters as baseline - duration = min(duration, 3 * timeDisplay); - - /*auto startTime = GetTime(); - if(!entries.empty()) startTime = entries.front().endTime; - entries.emplace_front(level, msg, startTime + duration);*/ - - auto endTime = GetTime() + duration; - if (!entries.empty()) endTime = max(endTime, entries.front().endTime + 200); - entries.emplace_front(level, msg, endTime); + entries.emplace_front(level, msg, timeDisplay); if (entries.size() > maxMessages) { @@ -67,12 +56,10 @@ void ScreenLog::Draw() scrollOffset = 0.0f; prevTime = currTime; - const auto now = GetTime(); // miliseconds - // clean up expired entries while(!entries.empty()) { - if(entries.back().endTime + timeFadeout < now) + if(entries.back().timeLeft < (-0.001f * timeFadeout)) entries.pop_back(); else break; @@ -99,24 +86,35 @@ void ScreenLog::Draw() float posX = 15.0f * sizeX; float posY = 7.0f * sizeY - scrollOffset; - for (size_t i = 0; i < entries.size(); i++) + // count total lines + int lines = 0; + for (auto& entry : entries) + { + lines += CountLines(entry.msg); + } + + float elapsed = 0.001f * (CTimer::m_snTimeInMilliseconds - CTimer::m_snPreviousTimeInMilliseconds); + float rowTime = -0.001f * timeFadeout; + for(auto it = entries.rbegin(); it != entries.rend(); it++) { - auto& entry = entries[i]; + auto& entry = *it; + + if(entry.timeLeft > 0.0f && entry.timeLeft < elapsed) + entry.timeLeft = 0.0f; // do not skip fade + else + entry.timeLeft -= elapsed; + rowTime = max(rowTime, entry.timeLeft); // carred on from older entries + BYTE alpha = 255; - if (entry.endTime < now) + if (rowTime < 0) { - auto elapsed = now - entry.endTime; - float fadeProgress = (float)elapsed / timeFadeout; + float fadeProgress = -rowTime / (0.001f * timeFadeout); fadeProgress = std::clamp(fadeProgress, 0.0f, 1.0f); fadeProgress = 1.0f - fadeProgress; // fade out fadeProgress = sqrtf(fadeProgress); alpha = (BYTE)(fadeProgress * 0xFF); - } - else if(entry.endTime > (now + 4 * timeDisplay)) - { - entry.endTime = now + 4 * timeDisplay; - } + }; auto color = fontColor[(size_t)entry.level]; alpha = min(alpha, color.a); @@ -127,10 +125,9 @@ void ScreenLog::Draw() alpha = std::clamp(int(alpha * alpha) / 255, 0, 255); // corrected for fadeout CFont::SetDropColor(CRGBA(0, 0, 0, alpha)); - CFont::PrintString(posX, posY, entry.msg.c_str()); - - size_t lines = CountLines(entry.msg); - posY += 18.0f * sizeY * lines; + lines -= CountLines(entry.msg); + float y = posY + 18.0f * sizeY * lines; + CFont::PrintString(posX, y, entry.msg.c_str()); } } diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index 31249293..cb3e0773 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -34,21 +34,23 @@ class ScreenLog { eLogLevel level; std::string msg; - DWORD endTime; + float timeLeft; Entry() : level(eLogLevel::Default), msg(""), - endTime(0) + timeLeft(0.0f) { } - Entry(eLogLevel level, const char* msg, DWORD endTime) : - level(level), - endTime(endTime) + Entry(eLogLevel level, const char* msg, DWORD durationMs) : + level(level) { if(msg != nullptr) { + timeLeft = min(strlen(msg), 200) * 0.06f; // 17 letters peer second reading speed + timeLeft = max(timeLeft, 0.001f * durationMs); + auto len = strlen(msg); this->msg.reserve(len); @@ -62,6 +64,10 @@ class ScreenLog this->msg.push_back(c); } } + else + { + timeLeft = 0.0f; + } } }; From 6f5126a293a072657fe213ae7c14b0f360acc4ed Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 15 Oct 2023 01:37:21 +0200 Subject: [PATCH 021/216] Fix to WriteStringParam. --- source/CCustomOpcodeSystem.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 48a3505b..8858bd68 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -621,7 +621,6 @@ namespace CLEO { case DT_LVAR_TEXTLABEL_ARRAY: targetBuff = (char*)GetScriptParamPointer(thread); len = min(len, 7); // 8 with terminator - GetScriptParams(thread, 1); // param processed break; // long string variable @@ -631,12 +630,11 @@ namespace CLEO { case DT_LVAR_STRING_ARRAY: targetBuff = (char*)GetScriptParamPointer(thread); len = min(len, 15); // 16 with terminator - GetScriptParams(thread, 1); // param processed break; default: { - GetScriptParams(thread, 1); // skip unhandled param + CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param SHOW_ERROR("Outputing string into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); return false; } From 6e8691b5dc2ec8806a376ae1a3589d1113909e98 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 15 Oct 2023 03:50:45 +0200 Subject: [PATCH 022/216] Added missing changes to CHANGELOG.md (#112) * Added missing changes to CHANGELOG.md * Header style updated. --- CHANGELOG.md | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8b7527f..a020f122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 4.5.0 +- introduced CLEO modules feature - introduced DebugUtils plugin - new opcode 00C3 (debug_on) - new opcode 00C4 (debug_off) @@ -7,23 +8,33 @@ - new opcode 00CD (trace) - new opcode 00CE (log_to_file) - new opcode 0DD5 (get_game_platform) -- implemented support of opcodes 0662, 0663 and 0664 (original R* script debugging opcodes. See DebugUtils.ini) -- updated project settings +- implemented support of opcodes 0662, 0663 and 0664 (original Rockstar's script debugging opcodes. See DebugUtils.ini) +- opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts +- introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules +- added more detailed error messages in some scenarios +- on some errors instead of crashing the game just invalid script is paused +- 0AB1 (cleo_call) and 0AB2 (cleo_return) scope now saves and restores GOSUB's call stack +- when reading less than 4 bytes with 0A9D (readfile) now remaining bytes of the target variable are set to zero +- fixed error in 004E (terminate_this_script) allowing to run multiple missions +- 'argument count' parameter of 0AB1 (cleo_call) is now optional. 'cleo_call @LABEL args 0' can be written as 'cleo_call @LABEL' +- 'argument count' parameter of 0AB2 (cleo_return) is now optional. 'cleo_return 0' can be written as 'cleo_return' +- fixed handling of strings longer than 128 characters causing errors in some cases +- fixed error in handling of first string argument in 0AF5 (write_string to_ini_file) +#### SDK AND PLUGINS +- now all opcodes in range 0-7FFF can be registered by plugins - plugins moved to cleo\cleo_plugins directory -- opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts. -- rewritten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts. -- added more detailed error messages in some cases -- on some errors instead of crashing game the invalid script is paused +- new SDK method: CLEO_ResolvePath +- new SDK method: CLEO_RegisterCallback +- new SDK method: CLEO_GetScriptVersion +- new SDK method: CLEO_GetScriptDebugMode +- new SDK method: CLEO_SetScriptDebugMode +- new SDK method: CLEO_Log +- new SDK method: CLEO_ReadParamsFormatted +#### CLEO internal +- updated project settings - updated general methods for getting and setting string parameters -- introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules -- updated opcodes handling. Now all opcodes in range 0-7FFF can be registered by CLEO plugins -- new CLEO SDK export: CLEO_ResolvePath -- new CLEO SDK export: CLEO_RegisterCallback -- new CLEO SDK export: CLEO_GetScriptVersion -- new CLEO SDK export: CLEO_GetScriptDebugMode -- new CLEO SDK export: CLEO_SetScriptDebugMode -- new CLEO SDK export: CLEO_Log -- new CLEO SDK export: CLEO_ReadParamsFormatted +- rewritten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts +- updated opcodes handling ## 4.4.4 From 97b56ed32fef38a6a4f5dd31692d86055f48f1f7 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 15 Oct 2023 05:00:27 +0200 Subject: [PATCH 023/216] Generate complete directory tree in release archives (#115) --- pack_release.bat | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pack_release.bat b/pack_release.bat index e286e4be..406ea7bb 100644 --- a/pack_release.bat +++ b/pack_release.bat @@ -8,6 +8,15 @@ echo Detected version: %fileVersion% SET outputFile=".\CLEO.SA_v%fileVersion%.zip" if exist %outputFile% del %outputFile% /q +mkdir pack_tmp +mkdir pack_tmp\cleo +mkdir pack_tmp\cleo\cleo_modules +mkdir pack_tmp\cleo\cleo_plugins +mkdir pack_tmp\cleo\cleo_saves +mkdir pack_tmp\cleo\cleo_text +%zip% a -tzip %outputFile% ".\pack_tmp\*" -r -bso0 +rmdir /s /q pack_tmp + %zip% a -tzip %outputFile% ".\Changelog.md" -bb2 | findstr "+" %zip% rn %outputFile% "Changelog.md" "cleo_readme\Changelog.txt" -bso0 From ab7b0b7b05456986906c8c9df278c35f3aadb771 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 15 Oct 2023 05:01:46 +0200 Subject: [PATCH 024/216] Main menu cleo text aspect fix (#114) * Used CFont from plugin sdk instead of direct memory hacks. Fixed aspect of main menu CLEO text. --- CHANGELOG.md | 1 + CLEO4.vcxproj | 9 +++ CLEO4.vcxproj.filters | 9 +++ source/CGameMenu.cpp | 123 +++++++++++++--------------------- source/PluginSdkExternals.cpp | 5 +- 5 files changed, 67 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a020f122..01d906c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - 'argument count' parameter of 0AB2 (cleo_return) is now optional. 'cleo_return 0' can be written as 'cleo_return' - fixed handling of strings longer than 128 characters causing errors in some cases - fixed error in handling of first string argument in 0AF5 (write_string to_ini_file) +- fixed resolution dependent aspect ratio of CLEO text in main menu #### SDK AND PLUGINS - now all opcodes in range 0-7FFF can be registered by plugins - plugins moved to cleo\cleo_plugins directory diff --git a/CLEO4.vcxproj b/CLEO4.vcxproj index 29111b41..83e30b86 100644 --- a/CLEO4.vcxproj +++ b/CLEO4.vcxproj @@ -11,6 +11,15 @@ + + NotUsing + + + NotUsing + + + NotUsing + NotUsing diff --git a/CLEO4.vcxproj.filters b/CLEO4.vcxproj.filters index 488bddd2..af6942cd 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO4.vcxproj.filters @@ -48,6 +48,15 @@ plugin_sdk + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + source\utils diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index 0badc8f3..51410aa3 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -2,52 +2,15 @@ #include "CGameMenu.h" #include "CleoBase.h" #include "CDebug.h" +#include "CFont.h" #include namespace CLEO { - void(__cdecl * TextDraw) (float x, float y, const char* text); - void(__cdecl * SetTextAlign) (BYTE nAlign); - void(__cdecl * SetTextFont) (BYTE nFont); - void(__cdecl* SetTextEdge) (char nEdge); - void(__cdecl * SetLetterSize) (float w, float h); - void(__cdecl * SetLetterColor) (RGBA color); + CMenuManager* MenuManager; - // thiscalls dont work with memory_pointer operator overload template - DWORD CMenuManager__ScaleX; - DWORD CMenuManager__ScaleY; - DWORD CTexture__DrawInRect; - //void (__thiscall * CTexture__DrawInRect) (void *texture, RwRect2D *rect, RwRGBA color); - - CMenuManager * MenuManager; - - float CGameMenu_ScaleX(CMenuManager *pMenu, float w) - { - float fReturn; - _asm - { - mov ecx, pMenu - push w - call CMenuManager__ScaleX - fstp fReturn - } - return fReturn; - } - - float CGameMenu_ScaleY(CMenuManager *pMenu, float w) - { - float fReturn; - _asm - { - mov ecx, pMenu - push w - call CMenuManager__ScaleY - fstp fReturn - } - return fReturn; - } - - void CTexture_DrawInRect(void *pTexture, RwRect2D *rect, RwRGBA *colour) + DWORD CTexture__DrawInRect; // original address + void CTexture_DrawInRect(void* pTexture, RwRect2D* rect, RwRGBA* colour) { _asm { @@ -60,36 +23,56 @@ namespace CLEO void __fastcall OnDrawMenuBackground(void *texture, int dummy, RwRect2D *rect, RwRGBA *color) { - CTexture_DrawInRect(texture, rect, color); + CTexture_DrawInRect(texture, rect, color); // call original + + CFont::SetBackground(false, false); + CFont::SetWrapx(640.0f); + CFont::SetFontStyle(FONT_MENU); + CFont::SetProportional(true); + CFont::SetOrientation(ALIGN_LEFT); + + CFont::SetColor({ 0xAD, 0xCE, 0xC4, 0xFF }); + CFont::SetEdge(1); + CFont::SetDropColor({ 0x00, 0x00, 0x00, 0xFF }); + + const float fontSize = 0.5f / 448.0f; + const float aspect = (float)RsGlobal.maximumWidth / RsGlobal.maximumHeight; + const float subtextHeight = 0.75f; // factor of first line height + float sizeX = fontSize * 0.5f / aspect * RsGlobal.maximumWidth; + float sizeY = fontSize * RsGlobal.maximumHeight; + + float posX = 25.0f * sizeX; // left margin + float posY = RsGlobal.maximumHeight - 15.0f * sizeY; // bottom margin + auto cs_count = GetInstance().ScriptEngine.WorkingScriptsCount(); auto plugin_count = GetInstance().PluginSystem.GetNumPlugins(); - std::ostringstream cleo_text; - cleo_text << "CLEO v" << CLEO_VERSION_STR; + if (cs_count || plugin_count) + { + posY -= 15.0f * sizeY; // add space for bottom text + } + + // draw CLEO version text + std::ostringstream text; + text << "CLEO v" << CLEO_VERSION_STR; #ifdef _DEBUG - cleo_text << " DEBUG"; + text << " ~r~~h~DEBUG"; #endif - SetTextAlign(1); - SetTextFont(2); - if (SetTextEdge) SetTextEdge(1); - SetLetterSize(CGameMenu_ScaleX(MenuManager, 0.23f), CGameMenu_ScaleY(MenuManager, 0.4f)); - - SetLetterColor(RGBA(/*0xE1, 0xE1, 0xE1, 0xFF*/0xAD, 0xCE, 0xC4, 0xFF)); - TextDraw(CGameMenu_ScaleX(MenuManager, 6.0f), CGameMenu_ScaleY(MenuManager, 428.0f), cleo_text.str().c_str()); - cleo_text.str(""); + CFont::SetScale(sizeX, sizeY); + CFont::PrintString(posX, posY - 15.0f * sizeY, text.str().c_str()); + // draw plugins / scripts text if (cs_count || plugin_count) { - if (plugin_count) cleo_text << plugin_count << (plugin_count > 1 ? " plugins" : " plugin"); - if (cs_count && plugin_count) cleo_text << " / "; - if (cs_count) cleo_text << cs_count << (cs_count > 1 ? " scripts" : " script"); - //plugin_text << plugin_count << (plugin_count > 1 ? " plugins" : " plugin"); - SetTextAlign(1); - //SetTextFont(2); - - SetLetterSize(CGameMenu_ScaleX(MenuManager, 0.18f), CGameMenu_ScaleY(MenuManager, 0.34f)); - - SetLetterColor(RGBA(/*0xE1, 0xE1, 0xE1, 0xFF*/0xAD, 0xCE, 0xC4, 0xFF)); - TextDraw(CGameMenu_ScaleX(MenuManager, 6.0f), CGameMenu_ScaleY(MenuManager, 436.0f), cleo_text.str().c_str()); + text.str(""); // clear + if (plugin_count) text << plugin_count << (plugin_count > 1 ? " plugins" : " plugin"); + if (cs_count && plugin_count) text << " / "; + if (cs_count) text << cs_count << (cs_count > 1 ? " scripts" : " script"); + + posY += 15.0f * sizeY; // line feed + sizeX *= subtextHeight; + sizeY *= subtextHeight; + CFont::SetScale(sizeX, sizeY); + CFont::PrintString(posX, posY - 15.0f * sizeY, text.str().c_str()); } // execute callbacks @@ -109,20 +92,8 @@ namespace CLEO TRACE("Injecting MenuStatusNotifier..."); CGameVersionManager& gvm = GetInstance().VersionManager; MenuManager = gvm.TranslateMemoryAddress(MA_MENU_MANAGER); - TextDraw = gvm.TranslateMemoryAddress(MA_DRAW_TEXT_FUNCTION); - SetTextAlign = gvm.TranslateMemoryAddress(MA_SET_TEXT_ALIGN_FUNCTION); - SetTextFont = gvm.TranslateMemoryAddress(MA_SET_TEXT_FONT_FUNCTION); - SetTextEdge = gvm.TranslateMemoryAddress(MA_SET_TEXT_EDGE_FUNCTION); - - // Meh... - CMenuManager__ScaleX = gvm.TranslateMemoryAddress(MA_CMENU_SCALE_X_FUNCTION); - CMenuManager__ScaleY = gvm.TranslateMemoryAddress(MA_CMENU_SCALE_Y_FUNCTION); - - SetLetterSize = gvm.TranslateMemoryAddress(MA_SET_LETTER_SIZE_FUNCTION); - SetLetterColor = gvm.TranslateMemoryAddress(MA_SET_LETTER_COLOR_FUNCTION); inj.MemoryReadOffset(gvm.TranslateMemoryAddress(MA_CALL_CTEXTURE_DRAW_BG_RECT).address + 1, CTexture__DrawInRect, true); - //gvm.TranslateMemoryAddress(MA_CTEXTURE_DRAW_IN_RECT_FUNCTION); inj.ReplaceFunction(OnDrawMenuBackground, gvm.TranslateMemoryAddress(MA_CALL_CTEXTURE_DRAW_BG_RECT)); } } diff --git a/source/PluginSdkExternals.cpp b/source/PluginSdkExternals.cpp index b4c7eaf8..e10c864b 100644 --- a/source/PluginSdkExternals.cpp +++ b/source/PluginSdkExternals.cpp @@ -1,11 +1,8 @@ #include "stdafx.h" #include "CVector.h" -#include "CRGBA.h" #include "CPed.h" bool CPed::IsPlayer() { return m_nPedType == PED_TYPE_PLAYER1 || m_nPedType == PED_TYPE_PLAYER2; } float CVector::Magnitude() { return sqrtf(x * x + y * y + z * z); } CVector::CVector() : x(0.0f), y(0.0f), z(0.0f) { } -CVector::CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { } -CRGBA::CRGBA(unsigned char _r, unsigned char _g, unsigned char _b, unsigned char _a) : r(_r), g(_g), b(_b), a(_a) { } -CRGBA &CRGBA::operator=(const CRGBA &rgba) { r = rgba.r; g = rgba.g; b = rgba.b; a = rgba.a; return *this; } \ No newline at end of file +CVector::CVector(float _x, float _y, float _z) : x(_x), y(_y), z(_z) { } \ No newline at end of file From 78300239c9dda29418022acba4db9c9630f68c53 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 15 Oct 2023 06:24:38 +0200 Subject: [PATCH 025/216] Added 2000 and 2001 opcodes. (#111) * Added 2000 and 2001 opcodes. * Added validation of script pointer parameter in opcode 2001 --- CHANGELOG.md | 2 ++ source/CCustomOpcodeSystem.cpp | 39 +++++++++++++++++++++++++++++++++- source/CScriptEngine.cpp | 17 +++++++++++++++ source/CScriptEngine.h | 1 + 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d906c4..ac44a5db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ - new opcode 00CD (trace) - new opcode 00CE (log_to_file) - new opcode 0DD5 (get_game_platform) +- new opcode 2000 (resolve_filepath) +- new opcode 2001 (get_script_name) - implemented support of opcodes 0662, 0663 and 0664 (original Rockstar's script debugging opcodes. See DebugUtils.ini) - opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts - introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 8858bd68..ac326fa8 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -117,8 +117,9 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AED(CRunningScript *thread); OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread); OpcodeResult __stdcall opcode_0AEF(CRunningScript *thread); - OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform + OpcodeResult __stdcall opcode_2000(CRunningScript* thread); // resolve_filepath + OpcodeResult __stdcall opcode_2001(CRunningScript* thread); // get_script_filename typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { @@ -339,6 +340,8 @@ namespace CLEO { CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform + CLEO_RegisterOpcode(0x2000, opcode_2000); // resolve_filepath + CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -2781,6 +2784,40 @@ namespace CLEO { *thread << PLATFORM_WINDOWS; return OR_CONTINUE; } + + //2000=2,%2s% = resolve_filepath %1s% + OpcodeResult __stdcall opcode_2000(CRunningScript* thread) + { + auto path = CLEO_ReadStringOpcodeParam(thread); + CLEO_ResolvePath(thread, path, MAX_STR_LEN); + CLEO_WriteStringOpcodeParam(thread, path); + return OR_CONTINUE; + } + + //2001=2,%2s% = get_script_filename %1d% // IF and SET + OpcodeResult __stdcall opcode_2001(CRunningScript* thread) + { + CCustomScript* script; + *thread >> script; + + if((int)script == -1) + { + script = (CCustomScript*)thread; // current script + } + else + { + if(!GetInstance().ScriptEngine.IsValidScriptPtr(script)) + { + CLEO_SkipOpcodeParams(thread, 1); // no result text + SetScriptCondResult(thread, false); // invalid input param + return OR_CONTINUE; + } + } + + CLEO_WriteStringOpcodeParam(thread, script->GetScriptFileName()); + SetScriptCondResult(thread, true); + return OR_CONTINUE; + } } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 9b88fba7..2c117612 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1120,6 +1120,23 @@ namespace CLEO return nullptr; } + bool CScriptEngine::IsValidScriptPtr(const CRunningScript* ptr) const + { + for (auto script = *activeThreadQueue; script != nullptr; script = script->GetNext()) + { + if (script == ptr) + return true; + } + + for (const auto script : CustomScripts) + { + if (script == ptr) + return true; + } + + return false; + } + void CScriptEngine::AddCustomScript(CCustomScript *cs) { if (cs->IsMission()) diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 942041db..db9eb44d 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -122,6 +122,7 @@ namespace CLEO CRunningScript* FindScriptNamed(const char *); CCustomScript* FindCustomScriptNamed(const char*); + bool IsValidScriptPtr(const CRunningScript*) const; // leads to active script? (regular or custom) void AddCustomScript(CCustomScript*); void RemoveCustomScript(CCustomScript*); void RemoveAllCustomScripts(); From 03c65b1d6ef991d5e2688e3a4bf57d6eb12b09aa Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 16 Oct 2023 03:39:10 +0200 Subject: [PATCH 026/216] Added callback for drawing finished. (#116) Updated clearing log list effect. Callback 'ScriptDraw' called after script elements drawing. --- cleo_plugins/DebugUtils/DebugUtils.cpp | 45 +++++++++++++------------- cleo_plugins/DebugUtils/ScreenLog.cpp | 20 +++++++++--- cleo_sdk/CLEO.h | 2 +- source/CGameMenu.cpp | 12 +------ source/CScriptEngine.cpp | 9 ++++-- source/CleoBase.cpp | 12 +++++++ source/CleoBase.h | 2 ++ 7 files changed, 60 insertions(+), 42 deletions(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index d62e9f3e..e8fdcf8e 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -53,11 +53,10 @@ class DebugUtils } // register event callbacks - CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); - CLEO_RegisterCallback(eCallbackId::ScriptDraw, OnScriptDraw); - CLEO_RegisterCallback(eCallbackId::MenuDraw, OnMenuDraw); - CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); CLEO_RegisterCallback(eCallbackId::Log, OnLog); + CLEO_RegisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); + CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); } else { @@ -75,35 +74,35 @@ class DebugUtils logFiles.clear(); // close all } - static void WINAPI OnScriptDraw(bool beforeFade) - { - if (beforeFade) return; // skip drawing before fade pass - - OnMenuDraw(); - } - - static void WINAPI OnMenuDraw() + static void WINAPI OnDrawingFinished() { // log messages screenLog.Draw(); // draw active breakpoints list - for (size_t i = 0; i < pausedScripts.size(); i++) + if(!pausedScripts.empty()) { - std::ostringstream ss; - ss << "Script '" << pausedScripts[i].ptr->GetName() << "' breakpoint"; - - if(!pausedScripts[i].msg.empty()) // named breakpoint + for (size_t i = 0; i < pausedScripts.size(); i++) { - ss << " '" << pausedScripts[i].msg << "'"; - } + std::ostringstream ss; + ss << "Script '" << pausedScripts[i].ptr->GetName() << "' breakpoint"; - if(i < KeyCount) - { - ss << " (F" << 5 + i << ")"; + if(!pausedScripts[i].msg.empty()) // named breakpoint + { + ss << " '" << pausedScripts[i].msg << "'"; + } + + if(i < KeyCount) + { + ss << " (F" << 5 + i << ")"; + } + + screenLog.DrawLine(ss.str().c_str(), i); } - screenLog.DrawLine(ss.str().c_str(), i); + // for some reason last string on print list is always drawn incorrectly + // Walkaround: add one extra dummy line then + screenLog.DrawLine("_~n~_~n~_", 500); } // update keys state diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 6c7d1476..1ea6c538 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -94,15 +94,23 @@ void ScreenLog::Draw() } float elapsed = 0.001f * (CTimer::m_snTimeInMilliseconds - CTimer::m_snPreviousTimeInMilliseconds); + float elapsedAlt = elapsed; float rowTime = -0.001f * timeFadeout; - for(auto it = entries.rbegin(); it != entries.rend(); it++) + for(auto it = entries.rbegin(); it != entries.rend(); it++) // draw from oldest { auto& entry = *it; - if(entry.timeLeft > 0.0f && entry.timeLeft < elapsed) - entry.timeLeft = 0.0f; // do not skip fade + if(entry.timeLeft > 0.0f) + { + if(entry.timeLeft < elapsedAlt) + entry.timeLeft = 0.0f; // do not skip fade + else + entry.timeLeft -= elapsedAlt; + } else - entry.timeLeft -= elapsed; + entry.timeLeft -= elapsed; // fade out + + elapsedAlt *= 0.98f; // keep every next line longer rowTime = max(rowTime, entry.timeLeft); // carred on from older entries @@ -129,6 +137,10 @@ void ScreenLog::Draw() float y = posY + 18.0f * sizeY * lines; CFont::PrintString(posX, y, entry.msg.c_str()); } + + // for some reason last string on print list is always drawn incorrectly + // Walkaround: add one extra dummy line then + CFont::PrintString(0.0f, -500.0f, "_~n~_~n~_"); } void ScreenLog::DrawLine(const char* msg, size_t row) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 5dec09d6..5346e39c 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -130,7 +130,7 @@ enum class eCallbackId : DWORD ScriptsFinalize, // void WINAPI OnScriptsFinalize(); ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript); // return false to skip this script processing ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); - MenuDraw, // void WINAPI OnMenuDraw(); + DrawingFinished, // void WINAPI OnDrawingFinished(); // called after game rendered everything and before presenting screen buffer Log, // void OnLog(eLogLevel level, const char* msg); }; diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index 51410aa3..585dd49e 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -3,6 +3,7 @@ #include "CleoBase.h" #include "CDebug.h" #include "CFont.h" +#include "plugin.h" #include namespace CLEO @@ -74,17 +75,6 @@ namespace CLEO CFont::SetScale(sizeX, sizeY); CFont::PrintString(posX, posY - 15.0f * sizeY, text.str().c_str()); } - - // execute callbacks - auto& cleo = GetInstance(); - if (cleo.IsStarted()) - { - for (void* func : cleo.GetCallbacks(eCallbackId::MenuDraw)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } - } } void CGameMenu::Inject(CCodeInjector& inj) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 2c117612..00656da8 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -440,14 +440,17 @@ namespace CLEO { GetInstance().ScriptEngine.DrawScriptStuff(bBeforeFade); + if(bBeforeFade) + DrawScriptStuff_H(bBeforeFade); + else + DrawScriptStuff(bBeforeFade); + + // run registered callbacks for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptDraw)) { typedef void WINAPI callback(bool); ((callback*)func)(bBeforeFade != 0); } - - // restore SCM textures and return to the overwritten func (which may != DrawScriptSprites) - return bBeforeFade ? DrawScriptStuff_H(bBeforeFade) : DrawScriptStuff(bBeforeFade); } #define NUM_STORED_SPRITES 128 diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 9429affa..fe8f03ad 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -47,6 +47,8 @@ namespace CLEO OpcodeSystem.Inject(CodeInjector); ScriptEngine.Inject(CodeInjector); + CodeInjector.ReplaceFunction(OnDrawingFinished, 0x00734640); // nullsub_63 - originally something like renderDebugStuff? + m_bStarted = true; } @@ -71,5 +73,15 @@ namespace CLEO { GetInstance().AddCallback(id, func); } + + void __cdecl CCleoInstance::OnDrawingFinished() + { + // execute callbacks + for (void* func : GetInstance().GetCallbacks(eCallbackId::DrawingFinished)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } + } } diff --git a/source/CleoBase.h b/source/CleoBase.h index 60c675f5..99831f23 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -48,6 +48,8 @@ namespace CLEO void AddCallback(eCallbackId id, void* func); const std::set& GetCallbacks(eCallbackId id); + static void __cdecl OnDrawingFinished(); + void(__cdecl * UpdateGameLogics)(); static void __cdecl OnUpdateGameLogics(); }; From 2c7ec21132b7333ea6488d43fb5cafc63f85ca6d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 16 Oct 2023 03:40:05 +0200 Subject: [PATCH 027/216] Added full_path argument to get_script_filename opcode. (#117) --- source/CCustomOpcodeSystem.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index ac326fa8..5c874801 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -7,6 +7,7 @@ #include "CModelInfo.h" #include +#include namespace CLEO { DWORD FUNC_fopen; @@ -2794,11 +2795,12 @@ namespace CLEO { return OR_CONTINUE; } - //2001=2,%2s% = get_script_filename %1d% // IF and SET + //2001=3,%3s% = get_script_filename %1d% full_path %2d% // IF and SET OpcodeResult __stdcall opcode_2001(CRunningScript* thread) { CCustomScript* script; - *thread >> script; + DWORD fullPath; + *thread >> script >> fullPath; if((int)script == -1) { @@ -2813,8 +2815,16 @@ namespace CLEO { return OR_CONTINUE; } } - - CLEO_WriteStringOpcodeParam(thread, script->GetScriptFileName()); + + if(fullPath != 0) + { + std::ostringstream ss; + ss << script->GetScriptFileDir() << "\\" << script->GetScriptFileName(); + CLEO_WriteStringOpcodeParam(thread, ss.str().c_str()); + } + else + CLEO_WriteStringOpcodeParam(thread, script->GetScriptFileName()); + SetScriptCondResult(thread, true); return OR_CONTINUE; } From 95f1e68f569b86e7a0bc49ce2737da93045a5739 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 17 Oct 2023 02:54:36 +0200 Subject: [PATCH 028/216] clearing mission locals on CLEO mission start (#118) --- CHANGELOG.md | 1 + source/CCustomOpcodeSystem.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac44a5db..156ffc26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ - fixed handling of strings longer than 128 characters causing errors in some cases - fixed error in handling of first string argument in 0AF5 (write_string to_ini_file) - fixed resolution dependent aspect ratio of CLEO text in main menu +- fixed clearing mission locals when new CLEO mission is started #### SDK AND PLUGINS - now all opcodes in range 0-7FFF can be registered by plugins - plugins moved to cleo\cleo_plugins directory diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 5c874801..da436dfd 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1273,6 +1273,7 @@ namespace CLEO { if (csscript->IsCustom()) cs->SetCompatibility(csscript->GetCompatibility()); GetInstance().ScriptEngine.AddCustomScript(cs); + memset(missionLocals, 0, 1024 * sizeof(SCRIPT_VAR)); // same as CTheScripts::WipeLocalVariableMemoryForMissionScript TransmitScriptParams(thread, (CRunningScript*)((BYTE*)missionLocals - 0x3C)); cs->SetDebugMode(reinterpret_cast(thread)->GetDebugMode()); } From da9dfd42acdf01ea24413a331fed5fec9c7b999b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 20 Oct 2023 16:22:28 +0200 Subject: [PATCH 029/216] Fixed bug in 00CC (#121) Adjusted screen log display times --- cleo_plugins/DebugUtils/DebugUtils.cpp | 2 +- cleo_plugins/DebugUtils/DebugUtils.ini | 2 +- cleo_plugins/DebugUtils/ScreenLog.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index e8fdcf8e..394abc9c 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -229,7 +229,7 @@ class DebugUtils CTimer::m_CodePause = true; } - return OR_CONTINUE; + return OR_INTERRUPT; } // 00CD=-1, trace %1s% ... diff --git a/cleo_plugins/DebugUtils/DebugUtils.ini b/cleo_plugins/DebugUtils/DebugUtils.ini index 320557f0..1353be88 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.ini +++ b/cleo_plugins/DebugUtils/DebugUtils.ini @@ -5,6 +5,6 @@ LegacyDebugOpcodes=0 [ScreenLog] ; Level: 0 - off, 1 - errors and warnings, 2 - debug messages, 3 - all Level=2 -MessageTime=8000 +MessageTime=4000 MessagesMax=35 FontSize=55 diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index cb3e0773..e69f9487 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -48,7 +48,7 @@ class ScreenLog { if(msg != nullptr) { - timeLeft = min(strlen(msg), 200) * 0.06f; // 17 letters peer second reading speed + timeLeft = min(strlen(msg), 200) * 0.08f; // 12 letters peer second reading speed timeLeft = max(timeLeft, 0.001f * durationMs); auto len = strlen(msg); From 62f7c7b18f6fcd743da69b14d21e3d5fed0734d3 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 20 Oct 2023 16:24:29 +0200 Subject: [PATCH 030/216] Fixed read bug when provided target buffer size is greater than 255. (#120) Now returned string is always null terminated. Removed filling remaining part of target buffer with zeros. --- source/CCustomOpcodeSystem.cpp | 60 ++++++++++++++++------------------ source/CCustomOpcodeSystem.h | 4 +-- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index da436dfd..c8be6338 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -519,10 +519,10 @@ namespace CLEO { } // read string parameter according to convention on strings - char* ReadStringParam(CRunningScript *thread, char* buf = nullptr, BYTE size = 0) + char* ReadStringParam(CRunningScript *thread, char* buf, DWORD bufSize) { - if (size == 0) size = MAX_STR_LEN; - if(buf != nullptr) memset(buf, 0, size); + static char internal_buf[MAX_STR_LEN]; + if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } auto paramType = CLEO_GetOperandType(thread); switch(paramType) @@ -533,13 +533,20 @@ namespace CLEO { case DT_LVAR: case DT_VAR_ARRAY: case DT_LVAR_ARRAY: + { GetScriptParams(thread, 1); - if (buf != nullptr) - { - strncpy(buf, opcodeParams[0].pcParam, size - 1); - buf[size - 1] = '\0'; - } - return opcodeParams[0].pcParam; // original string pointer + char* str = opcodeParams[0].pcParam; + + size_t length = strlen(str); + if(bufSize > 0) + length = min(length, bufSize - 1); // minus terminator char + else + length = 0; // no target buffer + + if (length) strncpy(buf, str, length); + if (bufSize > 0) buf[length] = '\0'; // string terminator + return buf; + } // short string variable case DT_VAR_TEXTLABEL: @@ -558,33 +565,28 @@ namespace CLEO { case DT_STRING: case DT_VARLEN_STRING: { - if (buf == nullptr) // provide buffer if not specified - { - static char result[MAX_STR_LEN]; - buf = result; - size = sizeof(result); - memset(buf, 0, size); - } - if (paramType == DT_VARLEN_STRING) { // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 thread->IncPtr(1); // already processed paramType - BYTE length = *thread->GetBytePointer(); // as unsigned! - thread->IncPtr(1); // length + DWORD length = (BYTE)*thread->GetBytePointer(); // as unsigned byte! + thread->IncPtr(1); // length info - if (length > 0) - { - auto count = min(size, length); - memcpy(buf, thread->GetBytePointer(), count); + char* str = (char*)thread->GetBytePointer(); + thread->IncPtr(length); // text data - thread->IncPtr(length); // skip read data - } + if (bufSize > 0) + length = min(length, bufSize - 1); // minus terminator char + else + length = 0; // no target buffer + + if (length) strncpy(buf, str, length); + if (bufSize > 0) buf[length] = '\0'; // string terminator } else { - GetScriptStringParam(thread, buf, size); // standard game's function + GetScriptStringParam(thread, buf, (BYTE)min(bufSize, 0xFF)); // standard game's function } return buf; @@ -650,7 +652,7 @@ namespace CLEO { } // perform 'sprintf'-operation for parameters, passed through SCM - int ReadFormattedString(CRunningScript *thread, char *outputStr, size_t len, const char *format) + int ReadFormattedString(CRunningScript *thread, char *outputStr, DWORD len, const char *format) { unsigned int written = 0; const char *iter = format; @@ -2884,10 +2886,6 @@ extern "C" LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) { - static char internal_buf[MAX_STR_LEN]; - if (!buf) { buf = internal_buf; size = MAX_STR_LEN; } - if (!size) size = MAX_STR_LEN; - std::fill(buf, buf + size, '\0'); return ReadStringParam(thread, buf, size); } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 9f60279e..24adbc24 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -62,8 +62,8 @@ namespace CLEO extern void(__thiscall * ProcessScript)(CRunningScript*); - char* ReadStringParam(CRunningScript* thread, char* buf, BYTE size); + char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); bool WriteStringParam(CRunningScript* thread, const char* str); - int ReadFormattedString(CRunningScript* thread, char* outputStr, size_t len, const char* format); + int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); void SkipUnusedParameters(CRunningScript* thread); // for var-args opcodes } From e6f5591bceb5654a8dd607d28cf209bcc040f3e5 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 22 Oct 2023 17:07:02 +0200 Subject: [PATCH 031/216] ReadFormattedString result always null terminated (#124) * ReadFormattedString result always null terminated * Skip would not work here anyway. --- source/CCustomOpcodeSystem.cpp | 237 +++++++++++++++++---------------- 1 file changed, 123 insertions(+), 114 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c8be6338..00235474 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -656,153 +656,162 @@ namespace CLEO { { unsigned int written = 0; const char *iter = format; + char* outIter = outputStr; char bufa[256], fmtbufa[64], *fmta; - while (*iter) + // invalid input arguments + if(outputStr == nullptr || len == 0) { - while (*iter && *iter != '%') - { - if (written++ >= len) - return -1; - *outputStr++ = *iter++; - } + SkipUnusedParameters(thread); + return -1; + } - if (*iter == '%') + if(len > 1 && format != nullptr) + { + while (*iter) { - if (iter[1] == '%') + while (*iter && *iter != '%') { - if (written++ >= len) - return -1; - *outputStr++ = '%'; /* "%%"->'%' */ - iter += 2; - continue; + if (written++ >= len) goto _ReadFormattedString_OutOfMemory; + *outIter++ = *iter++; } - //get flags and width specifier - fmta = fmtbufa; - *fmta++ = *iter++; - while (*iter == '0' || - *iter == '+' || - *iter == '-' || - *iter == ' ' || - *iter == '*' || - *iter == '#') + if (*iter == '%') { - if (*iter == '*') + if (iter[1] == '%') { - char *buffiter = bufa; - - //get width - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - GetScriptParams(thread, 1); - _itoa(opcodeParams[0].dwParam, buffiter, 10); - while (*buffiter) - *fmta++ = *buffiter++; + if (written++ >= len) goto _ReadFormattedString_OutOfMemory; + *outIter++ = '%'; /* "%%"->'%' */ + iter += 2; + continue; } - else - *fmta++ = *iter; - iter++; - } - - //get immidiate width value - while (isdigit(*iter)) - *fmta++ = *iter++; - //get precision - if (*iter == '.') - { + //get flags and width specifier + fmta = fmtbufa; *fmta++ = *iter++; - if (*iter == '*') + while (*iter == '0' || + *iter == '+' || + *iter == '-' || + *iter == ' ' || + *iter == '*' || + *iter == '#') { - char *buffiter = bufa; - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - GetScriptParams(thread, 1); - _itoa(opcodeParams[0].dwParam, buffiter, 10); - while (*buffiter) - *fmta++ = *buffiter++; + if (*iter == '*') + { + char *buffiter = bufa; + + //get width + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + GetScriptParams(thread, 1); + _itoa(opcodeParams[0].dwParam, buffiter, 10); + while (*buffiter) + *fmta++ = *buffiter++; + } + else + *fmta++ = *iter; + iter++; } - else - while (isdigit(*iter)) - *fmta++ = *iter++; - } - //get size - if (*iter == 'h' || *iter == 'l') - *fmta++ = *iter++; - switch (*iter) - { - case 's': - { - static const char none[] = "(null)"; - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - const char *astr = ReadStringParam(thread); - const char *striter = astr ? astr : none; - while (*striter) + //get immidiate width value + while (isdigit(*iter)) + *fmta++ = *iter++; + + //get precision + if (*iter == '.') { - if (written++ >= len) - return -1; - *outputStr++ = *striter++; + *fmta++ = *iter++; + if (*iter == '*') + { + char *buffiter = bufa; + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + GetScriptParams(thread, 1); + _itoa(opcodeParams[0].dwParam, buffiter, 10); + while (*buffiter) + *fmta++ = *buffiter++; + } + else + while (isdigit(*iter)) + *fmta++ = *iter++; } - iter++; - break; - } + //get size + if (*iter == 'h' || *iter == 'l') + *fmta++ = *iter++; - case 'c': - if (written++ >= len) - return -1; - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - GetScriptParams(thread, 1); - *outputStr++ = (char)opcodeParams[0].nParam; - iter++; - break; - - default: - { - /* For non wc types, use system sprintf and append to wide char output */ - /* FIXME: for unrecognised types, should ignore % when printing */ - char *bufaiter = bufa; - if (*iter == 'p' || *iter == 'P') + switch (*iter) { - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - GetScriptParams(thread, 1); - sprintf(bufaiter, "%08X", opcodeParams[0].dwParam); + case 's': + { + static const char none[] = "(null)"; + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + const char *astr = ReadStringParam(thread); + const char *striter = astr ? astr : none; + while (*striter) + { + if (written++ >= len) goto _ReadFormattedString_OutOfMemory; + *outIter++ = *striter++; + } + iter++; + break; } - else + + case 'c': + if (written++ >= len) goto _ReadFormattedString_OutOfMemory; + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + GetScriptParams(thread, 1); + *outIter++ = (char)opcodeParams[0].nParam; + iter++; + break; + + default: { - *fmta++ = *iter; - *fmta = '\0'; - if (*iter == 'a' || *iter == 'A' || - *iter == 'e' || *iter == 'E' || - *iter == 'f' || *iter == 'F' || - *iter == 'g' || *iter == 'G') + /* For non wc types, use system sprintf and append to wide char output */ + /* FIXME: for unrecognised types, should ignore % when printing */ + char *bufaiter = bufa; + if (*iter == 'p' || *iter == 'P') { - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); - sprintf(bufaiter, fmtbufa, opcodeParams[0].fParam); + sprintf(bufaiter, "%08X", opcodeParams[0].dwParam); } else { - if (CLEO_GetOperandType(thread) == DT_END) goto ReadFormattedString_ArgMissing; - GetScriptParams(thread, 1); - sprintf(bufaiter, fmtbufa, opcodeParams[0].pParam); + *fmta++ = *iter; + *fmta = '\0'; + if (*iter == 'a' || *iter == 'A' || + *iter == 'e' || *iter == 'E' || + *iter == 'f' || *iter == 'F' || + *iter == 'g' || *iter == 'G') + { + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + GetScriptParams(thread, 1); + sprintf(bufaiter, fmtbufa, opcodeParams[0].fParam); + } + else + { + if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + GetScriptParams(thread, 1); + sprintf(bufaiter, fmtbufa, opcodeParams[0].pParam); + } + } + while (*bufaiter) + { + if (written++ >= len) goto _ReadFormattedString_OutOfMemory; + *outIter++ = *bufaiter++; } + iter++; + break; } - while (*bufaiter) - { - if (written++ >= len) - return -1; - *outputStr++ = *bufaiter++; } - iter++; - break; - } } } } if (written >= len) { + _ReadFormattedString_OutOfMemory: // jump here on error LOG_WARNING("Read formatted string error: Insufficient output buffer size in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + SkipUnusedParameters(thread); + outputStr[len - 1] = '\0'; return -1; } @@ -813,13 +822,13 @@ namespace CLEO { } SkipUnusedParameters(thread); // skip terminator too - *outputStr++ = '\0'; + outputStr[written] = '\0'; return (int)written; - ReadFormattedString_ArgMissing: - thread->IncPtr(); // skip vararg terminator + _ReadFormattedString_ArgMissing: // jump here on error LOG_WARNING("Read formatted string: Not enough arguments to fulfill specified format in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - CCustomOpcodeSystem::ErrorSuspendScript(thread); + thread->IncPtr(); // skip vararg terminator + outputStr[written] = '\0'; return (int)written; } From 63692a73849be4228d48cf9668c103961bab381a Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 22 Oct 2023 17:10:59 +0200 Subject: [PATCH 032/216] Small features and refactoring (#125) CLEO.h functions sorted. White characters unified. Added error checking and messages to cleo_return --- CHANGELOG.md | 7 ++- cleo_plugins/DebugUtils/DebugUtils.cpp | 6 +- cleo_plugins/DebugUtils/Utils.h | 11 +--- cleo_sdk/CLEO.h | 82 +++++++++++++------------ cleo_sdk/CLEO.lib | Bin 9208 -> 10440 bytes source/CCustomOpcodeSystem.cpp | 77 ++++++++++++++++------- source/CCustomOpcodeSystem.h | 3 +- source/CScriptEngine.cpp | 14 +++++ source/cleo.def | 18 +++--- 9 files changed, 131 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 156ffc26..cdf3c5d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,13 +27,16 @@ #### SDK AND PLUGINS - now all opcodes in range 0-7FFF can be registered by plugins - plugins moved to cleo\cleo_plugins directory -- new SDK method: CLEO_ResolvePath - new SDK method: CLEO_RegisterCallback +- new SDK method: CLEO_GetVarArgCount +- new SDK method: CLEO_SkipUnusedVarArgs +- new SDK method: CLEO_ReadParamsFormatted - new SDK method: CLEO_GetScriptVersion +- new SDK method: CLEO_GetScriptInfoStr +- new SDK method: CLEO_ResolvePath - new SDK method: CLEO_GetScriptDebugMode - new SDK method: CLEO_SetScriptDebugMode - new SDK method: CLEO_Log -- new SDK method: CLEO_ReadParamsFormatted #### CLEO internal - updated project settings - updated general methods for getting and setting string parameters diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index 394abc9c..25cc482f 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -190,7 +190,7 @@ class DebugUtils { if (!CLEO_GetScriptDebugMode(thread)) { - SkipUnusedParams(thread); + CLEO_SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -237,7 +237,7 @@ class DebugUtils { if (!CLEO_GetScriptDebugMode(thread)) { - SkipUnusedParams(thread); + CLEO_SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -279,7 +279,7 @@ class DebugUtils ss << "Failed to open log file '" << filename << "'"; CLEO_Log(eLogLevel::Error, ss.str().c_str()); - SkipUnusedParams(thread); + CLEO_SkipUnusedVarArgs(thread); return OR_CONTINUE; } diff --git a/cleo_plugins/DebugUtils/Utils.h b/cleo_plugins/DebugUtils/Utils.h index a094906d..58c9ade0 100644 --- a/cleo_plugins/DebugUtils/Utils.h +++ b/cleo_plugins/DebugUtils/Utils.h @@ -7,19 +7,10 @@ static std::string GetConfigFilename() { std::string configFile = CFileMgr::ms_rootDirName; - if (!configFile.empty()) configFile += "\\"; + if (!configFile.empty() && configFile.back() != '\\') configFile.push_back('\\'); configFile += "cleo\\cleo_plugins\\DebugUtils.ini"; return configFile; } -// var-args opcodes -static void SkipUnusedParams(CScriptThread* thread) -{ - while (CLEO_GetOperandType(thread) != DT_END) - CLEO_SkipOpcodeParams(thread, 1); // skip param - - thread->ReadDataByte(); // skip terminator -} - diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 5346e39c..26481048 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -128,6 +128,8 @@ enum class eCallbackId : DWORD ScmInit3, // void WINAPI OnScmInit3(); ScriptsLoaded, // void WINAPI OnScriptsLoaded(); ScriptsFinalize, // void WINAPI OnScriptsFinalize(); + ScriptRegister, // void WINAPI OnScriptRegister(CRunningScript* pScript); // called after script creation + ScriptUnregister, // void WINAPI OnScriptUnregister(CRunningScript* pScript); // called before script deletion ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript); // return false to skip this script processing ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); DrawingFinished, // void WINAPI OnDrawingFinished(); // called after game rendered everything and before presenting screen buffer @@ -165,7 +167,7 @@ struct CRunningScript BYTE* CurrentIP; // 0x14 current instruction pointer BYTE* Stack[8]; // 0x18 return stack for 0050, 0051 WORD SP; // 0x38 current item in stack - BYTE _pad3A[2]; // 0x3A padding + BYTE _pad3A[2]; // 0x3A padding SCRIPT_VAR LocalVar[32]; // 0x3C script's local variables DWORD Timers[2]; // 0xBC script's timers bool bIsActive; // 0xC4 is script active @@ -174,17 +176,17 @@ struct CRunningScript bool bIsExternal; // 0xC7 is thread external (from script.img) bool bTextBlockOverride; // 0xC8 BYTE bExternalType; // 0xC9 - BYTE _padCA[2]; // 0xCA padding + BYTE _padCA[2]; // 0xCA padding DWORD WakeTime; // 0xCC time, when script starts again after 0001 opcode eLogicalOperation LogicalOp;// 0xD0 opcode 00D6 parameter bool NotFlag; // 0xD2 opcode & 0x8000 != 0 bool bWastedBustedCheck; // 0xD3 wasted_or_busted check flag bool bWastedOrBusted; // 0xD4 is player wasted or busted - char _padD5[3]; // 0xD5 padding + char _padD5[3]; // 0xD5 padding void* SceneSkipIP; // 0xD8 scene skip label ptr bool bIsMission; // 0xDC is this script mission - WORD ScmFunction; // 0xDD CLEO's previous scmFunction id - bool bIsCustom; // 0xDF is this CLEO script + WORD ScmFunction; // 0xDD CLEO's previous scmFunction id + bool bIsCustom; // 0xDF is this CLEO script #ifdef __cplusplus public: @@ -266,15 +268,15 @@ static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!") // alias for legacy use #ifdef __cplusplus - typedef class CRunningScript CScriptThread; + typedef class CRunningScript CScriptThread; #else - typedef struct CRunningScript CScriptThread; + typedef struct CRunningScript CScriptThread; #endif enum OpcodeResult : char { - OR_CONTINUE = 0, - OR_INTERRUPT = 1, + OR_CONTINUE = 0, + OR_INTERRUPT = 1, OR_ERROR = -1, }; @@ -286,62 +288,62 @@ extern "C" { #endif //__cplusplus DWORD WINAPI CLEO_GetVersion(); -eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); eGameVersion WINAPI CLEO_GetGameVersion(); BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); -DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); -float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); - -void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); -void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); - -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); // exactly same as CLEO_ReadStringOpcodeParam -void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, const char* str); -char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int size = 0); +// script utils +void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages +eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); // compatibility mode void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); - -void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); - void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); -eDataType WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peep data type - -extern SCRIPT_VAR *opcodeParams; +eDataType WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peek parameter data type +DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek var-args count -extern SCRIPT_VAR *missionLocals; +extern SCRIPT_VAR* opcodeParams; +extern SCRIPT_VAR* missionLocals; -//intermediate data is stored in opcodeParams array -void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript *thread, int count); -void WINAPI CLEO_RecordOpcodeParams(CRunningScript *thread, int count); +// param read +SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // get pointer to the variable data. Advances script to next param +void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); // read multiple params. Stored in opcodeParams array +DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); +float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); +LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); // exactly same as CLEO_ReadStringOpcodeParam +char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int size = 0); // consumes all var-arg params and terminator -SCRIPT_VAR * WINAPI CLEO_GetPointerToScriptVariable(CRunningScript *thread); +// param skip without reading +void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); +void WINAPI CLEO_SkipUnusedVarArgs(CRunningScript* thread); // for var-args opcodes. Should be called even when all params were read (to skip var-arg terminator) -DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret RwTexture * +// param write +void WINAPI CLEO_RecordOpcodeParams(CRunningScript* thread, int count); // write multiple params from opcodeParams array +void WINAPI CLEO_SetIntOpcodeParam(CRunningScript* thread, DWORD value); +void WINAPI CLEO_SetFloatOpcodeParam(CRunningScript* thread, float value); +void WINAPI CLEO_WriteStringOpcodeParam(CRunningScript* thread, const char* str); -HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * -CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char *script_name, int label); +BOOL WINAPI CLEO_GetScriptDebugMode(const CRunningScript* thread); // debug mode features enabled for this script? +void WINAPI CLEO_SetScriptDebugMode(CRunningScript* thread, BOOL enabled); +CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char* script_name, int label); CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript(); +// scripts deletion callback void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func); - void WINAPI CLEO_RemoveScriptDeleteDelegate(FuncScriptDeleteDelegateT func); -void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path +DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret RwTexture * -BOOL WINAPI CLEO_GetScriptDebugMode(const CRunningScript* thread); // debug mode features enabled for this script? -void WINAPI CLEO_SetScriptDebugMode(CRunningScript* thread, BOOL enabled); +HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * -void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log +void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path -void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages +void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log #ifdef __cplusplus } diff --git a/cleo_sdk/CLEO.lib b/cleo_sdk/CLEO.lib index 25791ef2cfc7d77359a137a1b2bf7606095e5f5c..70f64d4d1ee757c868cc6b964f167c7650369c9e 100644 GIT binary patch delta 1521 zcmb7^PiRzE6vn?7#>~7-{)~AxGns#H{*Ne36Ad95Vwz&BjaoGX1s9!3TN53MNgNXq zT8IXPtTgrrrLKw#OTljP1Sw6aSP?g(2D<1%3tgyWp3S_ndPWhvROL>D1x0fkvSG5q4PHW1vW-n zYit7B*dX!E*u*$0q8Dw#(`=liHf8un%a}s1RMS?{XCRbtRq|Nxq zI;X{IJ2{j~Ug@xbP>$wfNXa~<&vv?s-yd*-O}{iq5-&CfF3V! ztQB}na(>`O0C*Y(vJqfIlxG+NigDl($t8d-NnoB_;4%3qG)tDq&|07~2+T3>d;A7l+%rwy7FbPerUX+HB}Usou1vs8w_rp^<2~C%a9R&GQIB~9td|Fm5VR+BTZ(n zyzm;yliz!9@esN5Rk7u3A$J42WW;~e-Ei*TMWxE4mU~XW7-m+^c#X*XsdsNH1zMq8 z@ztI5m_3pW9M=bO-~9a!gA(#v;FJ2F+xJ|hqi6-{@p@YQ!Jjnu*e;f#hwUN^-1--Im@pS-WO7zPgxSvyZ!p_>DRK|$~!bAQ1E4}Q~#R|B6s{79a+--o%=8|zbz$t#yi zgQp$?Fb$XsKx!38Zvolk(8wY%&|{=rHB?|upg140XMun-5ptVApv2MxD~y4YP`Ta! z%5^#nc{O05!$_5}zt$Nkw;3vS*^u&H$eIBH+2xS6!!u=0Lr#kWky{MqH<(9uW|uE2 zaH@emV_!O%n*UDm&dPDzkgLOS86P%eNj2m^*W_E$kY3W1_vQuJH+4xw&q!ajBpG+) zdDMFvooq~2|J^Dl#DWQ|rhu(9aGV7eM}Qtxb3m*B%yX<+8<=o_rpts9utFzPF9U~E zW1$X>Re*KcWNw#vULKgSfMwR*VLZ+7idrmGWT6dy+-2bvHtEx}e2!(x7FEMcTIip@L8}99C_*9 zQM-5VeH`V<_07_eqPKru-Vc(v9%RqV`3Y}IQ6IJ6{^s&1_;Z!lioD3BqP( diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 00235474..25aa9c26 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -820,7 +820,7 @@ namespace CLEO { { LOG_WARNING("Read formatted string: Found more params than format slots in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); } - SkipUnusedParameters(thread); // skip terminator too + SkipUnusedVarArgs(thread); // skip terminator too outputStr[written] = '\0'; return (int)written; @@ -1007,12 +1007,27 @@ namespace CLEO { thread->SetIp(off < 0 ? thread->GetBasePointer() - off : scmBlock + off); } - void SkipUnusedParameters(CRunningScript *thread) + void SkipUnusedVarArgs(CRunningScript *thread) { - while (CLEO_GetOperandType(thread) != DT_END) - GetScriptParams(thread, 1); // skip param + while (CLEO_GetOperandType(thread) != DT_END) + CLEO_SkipOpcodeParams(thread, 1); - thread->ReadDataByte(); // skip terminator + thread->IncPtr(); // skip terminator + } + + DWORD GetVarArgCount(CRunningScript* thread) + { + const auto ip = thread->GetBytePointer(); + + DWORD count = 0; + while (CLEO_GetOperandType(thread) != DT_END) + { + CLEO_SkipOpcodeParams(thread, 1); + count++; + } + + thread->SetIp(ip); // restore + return count; } struct ScmFunction @@ -1249,7 +1264,7 @@ namespace CLEO { else { if (cs) delete cs; - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); LOG_WARNING("Failed to load script '%s' in script ", filename.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } @@ -1291,7 +1306,7 @@ namespace CLEO { else { if (cs) delete cs; - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); LOG_WARNING("[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); } @@ -1566,7 +1581,7 @@ namespace CLEO { add esp, stackAlign } - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -1626,7 +1641,7 @@ namespace CLEO { add esp, stackAlign } - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -1689,7 +1704,7 @@ namespace CLEO { } *thread << result; - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -1754,7 +1769,7 @@ namespace CLEO { } *thread << result; - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return OR_CONTINUE; } @@ -1964,7 +1979,7 @@ namespace CLEO { if (nParams > 32) GetScriptParams(thread, nParams - 32); - // all areguments read + // all arguments read scmFunc->retnAddress = thread->GetBytePointer(); // pass arguments as new scope local variables @@ -1990,18 +2005,24 @@ namespace CLEO { { ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; - DWORD nRetParams = 0; - if (*thread->GetBytePointer()) *thread >> nRetParams; + DWORD returnParamCount = 0; + if (*thread->GetBytePointer()) *thread >> returnParamCount; + if (returnParamCount) GetScriptParams(thread, returnParamCount); - if (nRetParams) GetScriptParams(thread, nRetParams); - scmFunc->Return(thread); - if (nRetParams) SetScriptParams(thread, nRetParams); - SkipUnusedParameters(thread); + scmFunc->Return(thread); // jump back to cleo_call, right after last input param. Return slot var args starts here + if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // export - release module + delete scmFunc; - if(scmFunc->moduleExportRef != nullptr) - GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + DWORD returnSlotCount = GetVarArgCount(thread); + if (returnSlotCount > returnParamCount) + { + SHOW_ERROR("Opcode [0AB2] returned fewer params than expected by function caller in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + if (returnSlotCount) SetScriptParams(thread, returnSlotCount); + thread->IncPtr(); // skip var args terminator - delete scmFunc; return OR_CONTINUE; } @@ -2913,7 +2934,7 @@ extern "C" if(format != nullptr && strlen(format) > 0) ReadFormattedString(thread, buf, size, format); else - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return buf; } @@ -2923,6 +2944,11 @@ extern "C" SetScriptCondResult(thread, result != FALSE); } + DWORD WINAPI CLEO_GetVarArgCount(CLEO::CRunningScript* thread) + { + return GetVarArgCount(thread); + } + void WINAPI CLEO_SkipOpcodeParams(CLEO::CRunningScript* thread, int count) { for (int i = 0; i < count; i++) @@ -2965,6 +2991,11 @@ extern "C" } } + void WINAPI CLEO_SkipUnusedVarArgs(CLEO::CRunningScript* thread) + { + SkipUnusedVarArgs(thread); + } + void WINAPI CLEO_ThreadJumpAtLabelPtr(CLEO::CRunningScript* thread, int labelPtr) { ThreadJump(thread, labelPtr); @@ -3033,7 +3064,7 @@ extern "C" else { if (cs) delete cs; - if (fromThread) SkipUnusedParameters(fromThread); + if (fromThread) SkipUnusedVarArgs(fromThread); LOG_WARNING("Failed to load script '%s'.", script_name); return nullptr; } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 24adbc24..8af6f1d3 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -65,5 +65,6 @@ namespace CLEO char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); bool WriteStringParam(CRunningScript* thread, const char* str); int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); - void SkipUnusedParameters(CRunningScript* thread); // for var-args opcodes + void SkipUnusedVarArgs(CRunningScript* thread); // for var-args opcodes + DWORD GetVarArgCount(CRunningScript* thread); // for var-args opcodes } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 00656da8..2161ff7a 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1154,10 +1154,24 @@ namespace CLEO } AddScriptToQueue(cs, activeThreadQueue); cs->SetActive(true); + + // run registered callbacks + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptRegister)) + { + typedef void WINAPI callback(CCustomScript*); + ((callback*)func)(cs); + } } void CScriptEngine::RemoveCustomScript(CCustomScript *cs) { + // run registered callbacks + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptUnregister)) + { + typedef void WINAPI callback(CCustomScript*); + ((callback*)func)(cs); + } + if (cs->parentThread) { cs->BaseIP = 0; // don't delete BaseIP if child thread diff --git a/source/cleo.def b/source/cleo.def index 394aad2f..c9dffce9 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -27,11 +27,13 @@ EXPORTS _CLEO_GetLastCreatedCustomScript@0 @24 _CLEO_AddScriptDeleteDelegate@4 @25 _CLEO_RemoveScriptDeleteDelegate@4 @26 - _CLEO_ResolvePath@12 @27 - _CLEO_GetScriptVersion@4 @28 - _CLEO_RegisterCallback@8 @29 - _CLEO_GetScriptDebugMode@4 @30 - _CLEO_SetScriptDebugMode@8 @31 - _CLEO_Log@8 @32 - _CLEO_ReadParamsFormatted@16 @33 - + _CLEO_RegisterCallback@8 @27 + _CLEO_GetVarArgCount@4 @28 + _CLEO_SkipUnusedVarArgs@4 @29 + _CLEO_ReadParamsFormatted@16 @30 + _CLEO_GetScriptVersion@4 @31 + _CLEO_GetScriptInfoStr@16 @32 + _CLEO_ResolvePath@12 @33 + _CLEO_GetScriptDebugMode@4 @34 + _CLEO_SetScriptDebugMode@8 @35 + _CLEO_Log@8 @36 From ade954d9c641a47c3c61c9ada49f0ab231a41459 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 22 Oct 2023 17:16:02 +0200 Subject: [PATCH 033/216] script parameter read write type validation (#123) --- source/CCustomOpcodeSystem.cpp | 114 ++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 25aa9c26..d9e7823a 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -429,6 +429,23 @@ namespace CLEO { inline CRunningScript& operator>>(CRunningScript& thread, DWORD& uval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch(paramType) + { + // integers + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Reading integer from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + GetScriptParams(&thread, 1); uval = opcodeParams[0].dwParam; return thread; @@ -436,6 +453,23 @@ namespace CLEO { inline CRunningScript& operator<<(CRunningScript& thread, DWORD uval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch (paramType) + { + // integers + /*case DT_BYTE: + case DT_WORD: + case DT_DWORD:*/ + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Writing integer into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + opcodeParams[0].dwParam = uval; SetScriptParams(&thread, 1); return thread; @@ -443,6 +477,23 @@ namespace CLEO { inline CRunningScript& operator>>(CRunningScript& thread, int& nval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch (paramType) + { + // integers + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Reading integer from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + GetScriptParams(&thread, 1); nval = opcodeParams[0].nParam; return thread; @@ -450,6 +501,23 @@ namespace CLEO { inline CRunningScript& operator<<(CRunningScript& thread, int nval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch (paramType) + { + // integers + /*case DT_BYTE: + case DT_WORD: + case DT_DWORD:*/ + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Writing integer into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + opcodeParams[0].nParam = nval; SetScriptParams(&thread, 1); return thread; @@ -457,6 +525,20 @@ namespace CLEO { inline CRunningScript& operator>>(CRunningScript& thread, float& fval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch (paramType) + { + case DT_FLOAT: + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Reading float from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + GetScriptParams(&thread, 1); fval = opcodeParams[0].fParam; return thread; @@ -464,6 +546,19 @@ namespace CLEO { inline CRunningScript& operator<<(CRunningScript& thread, float fval) { + auto paramType = (eDataType)*thread.GetBytePointer(); + switch (paramType) + { + case DT_LVAR: + case DT_LVAR_ARRAY: + case DT_VAR: + case DT_VAR_ARRAY: + break; + + default: + LOG_WARNING("Writing float into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + } + opcodeParams[0].fParam = fval; SetScriptParams(&thread, 1); return thread; @@ -1663,7 +1758,6 @@ namespace CLEO { { switch (*thread->GetBytePointer()) { - case DT_FLOAT: case DT_DWORD: case DT_WORD: case DT_BYTE: @@ -1673,12 +1767,18 @@ namespace CLEO { case DT_LVAR_ARRAY: *thread >> arg->dwParam; break; + + case DT_FLOAT: + *thread >> arg->fParam; + break; + case DT_VAR_STRING: case DT_LVAR_STRING: case DT_VAR_TEXTLABEL: case DT_LVAR_TEXTLABEL: arg->pParam = GetScriptParamPointer(thread); break; + case DT_VARLEN_STRING: case DT_TEXTLABEL: arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); @@ -1728,7 +1828,6 @@ namespace CLEO { { switch (*thread->GetBytePointer()) { - case DT_FLOAT: case DT_DWORD: case DT_WORD: case DT_BYTE: @@ -1738,12 +1837,18 @@ namespace CLEO { case DT_LVAR_ARRAY: *thread >> arg->dwParam; break; + + case DT_FLOAT: + *thread >> arg->fParam; + break; + case DT_VAR_STRING: case DT_LVAR_STRING: case DT_VAR_TEXTLABEL: case DT_LVAR_TEXTLABEL: arg->pParam = GetScriptParamPointer(thread); break; + case DT_VARLEN_STRING: case DT_TEXTLABEL: arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); @@ -1943,7 +2048,6 @@ namespace CLEO { switch (*thread->GetBytePointer()) { - case DT_FLOAT: case DT_DWORD: case DT_WORD: case DT_BYTE: @@ -1954,6 +2058,10 @@ namespace CLEO { *thread >> arg->dwParam; break; + case DT_FLOAT: + *thread >> arg->fParam; + break; + case DT_VAR_STRING: case DT_LVAR_STRING: case DT_VAR_TEXTLABEL: From e42a23bce0b57302e3aa80cff4306714da1c0ff0 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 22 Oct 2023 18:44:39 +0200 Subject: [PATCH 034/216] Fixed renamed function calls. (#126) --- source/CCustomOpcodeSystem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index d9e7823a..bcfa96b2 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -757,7 +757,7 @@ namespace CLEO { // invalid input arguments if(outputStr == nullptr || len == 0) { - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); return -1; } @@ -905,7 +905,7 @@ namespace CLEO { { _ReadFormattedString_OutOfMemory: // jump here on error LOG_WARNING("Read formatted string error: Insufficient output buffer size in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - SkipUnusedParameters(thread); + SkipUnusedVarArgs(thread); outputStr[len - 1] = '\0'; return -1; } From cd3d9a290d1ecff37b3a118b3def575948ec885b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 22 Oct 2023 18:45:29 +0200 Subject: [PATCH 035/216] Grouping repeated messages in screen log. (#122) * Grouping repeated messages in screen log. * Enabled last opcode trace to log. --- cleo_plugins/DebugUtils/ScreenLog.cpp | 32 +++++++---- cleo_plugins/DebugUtils/ScreenLog.h | 81 +++++++++++++++++++++------ source/CCustomOpcodeSystem.cpp | 16 +++--- source/CCustomOpcodeSystem.h | 8 ++- source/CScriptEngine.cpp | 13 ++++- 5 files changed, 112 insertions(+), 38 deletions(-) diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 1ea6c538..b13c09bf 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -3,10 +3,11 @@ #include "CFont.h" #include "CTimer.h" +DWORD ScreenLog::timeDisplay = 1000; + ScreenLog::ScreenLog() { scrollOffset = 0.0f; - Init(); } @@ -29,17 +30,27 @@ void ScreenLog::Add(eLogLevel level, const char* msg) return; } - entries.emplace_front(level, msg, timeDisplay); - - if (entries.size() > maxMessages) + Entry entry(level, msg); + if(!entries.empty() && entries.front() == entry) { - entries.resize(maxMessages); + entries.front().Repeat(); // duplicated } + else + { + entries.push_front(std::move(entry)); - // update scroll pos - float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; - size_t lines = CountLines(std::string(msg)); - scrollOffset += 18.0f * lines * sizeY; + bool full = entries.size() >= maxMessages; + if (full) entries.resize(maxMessages); + + // update scroll pos + float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; + size_t lines = CountLines(std::string(msg)); + + if(!full) + scrollOffset += 18.0f * lines * sizeY; + else + scrollOffset = 0.0f; // do not animate if list was full + } } void ScreenLog::Draw() @@ -135,7 +146,8 @@ void ScreenLog::Draw() lines -= CountLines(entry.msg); float y = posY + 18.0f * sizeY * lines; - CFont::PrintString(posX, y, entry.msg.c_str()); + + CFont::PrintString(posX, y, entry.GetMsg()); } // for some reason last string on print list is always drawn incorrectly diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index e69f9487..149e9e7b 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -9,6 +9,8 @@ using namespace CLEO; class ScreenLog { public: + static DWORD timeDisplay; // miliseconds + ScreenLog(); void Init(); @@ -20,8 +22,7 @@ class ScreenLog eLogLevel level; size_t maxMessages; float fontSize; - DWORD timeDisplay; - DWORD timeFadeout; + DWORD timeFadeout; // miliseconds const CRGBA fontColor[4] = { // colors for eLogLevel CRGBA(0xDD, 0xDD, 0xDD, 0xF0), // None @@ -34,40 +35,84 @@ class ScreenLog { eLogLevel level; std::string msg; + size_t msgStartPos; float timeLeft; + size_t repeats; + + static const size_t Repeat_Prefix_Len = 16; // extra characters for repeat count text Entry() : level(eLogLevel::Default), msg(""), - timeLeft(0.0f) + timeLeft(0.0f), + repeats(1) { } - Entry(eLogLevel level, const char* msg, DWORD durationMs) : - level(level) + Entry(eLogLevel level, const char* msg) : + level(level), + repeats(1) { if(msg != nullptr) { - timeLeft = min(strlen(msg), 200) * 0.08f; // 12 letters peer second reading speed - timeLeft = max(timeLeft, 0.001f * durationMs); - auto len = strlen(msg); - this->msg.reserve(len); + this->msg.reserve(Repeat_Prefix_Len + len); + // repeat prefix + this->msg.resize(Repeat_Prefix_Len - 2); + this->msg.push_back(':'); + this->msg.push_back(' '); + msgStartPos = Repeat_Prefix_Len; // prefix not present + + // copy input message for(size_t i = 0; i < len; i++) { - char c = msg[i]; + const char c = msg[i]; + switch(c) + { + case '\n': + this->msg += "~n~"; + break; - if(c == '\n') - this->msg += "~n~"; - else - this->msg.push_back(c); + // characters not represented correctly by game's font texture + case '{': + case '}': + this->msg.push_back('_'); + break; + + default: + this->msg.push_back(c); + } } } - else - { - timeLeft = 0.0f; - } + + ResetTime(); + } + + void Repeat() + { + ResetTime(); + repeats++; + + std::string prefix = "x" + std::to_string(repeats); + msgStartPos = Repeat_Prefix_Len - 2 - prefix.length(); // and ": " + msg.replace(msgStartPos, prefix.length(), prefix); + } + + void ResetTime() + { + timeLeft = min(msg.length(), 200) * 0.08f; // 12 letters peer second reading speed + timeLeft = max(timeLeft, 0.001f * ScreenLog::timeDisplay); // not shorter than defined in config + } + + const char* GetMsg(bool prefix = true) const + { + return msg.c_str() + (prefix ? msgStartPos : Repeat_Prefix_Len); + } + + bool operator==(const Entry& other) const + { + return level == other.level && !strcmp(GetMsg(false), other.GetMsg(false)); } }; diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index bcfa96b2..6a72ef10 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -157,11 +157,10 @@ namespace CLEO { void(__cdecl * SpawnCar)(DWORD); - WORD last_opcode = 0; - WORD last_custom_opcode = 0; - char last_thread[8] = "none"; - CRunningScript * last_script; - ptrdiff_t last_off = -1; + CRunningScript* CCustomOpcodeSystem::lastScript = nullptr; + WORD CCustomOpcodeSystem::lastOpcode = 0; + WORD* CCustomOpcodeSystem::lastOpcodePtr = nullptr; + WORD CCustomOpcodeSystem::lastCustomOpcode = 0; // opcode handler for custom opcodes OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) @@ -170,6 +169,10 @@ namespace CLEO { ss << thread->GetName() << " opcode " << opcodeToStr(opcode) << std::endl; OutputDebugStringA(ss.str().c_str());//*/ + lastScript = thread; + lastOpcode = opcode; + lastOpcodePtr = (WORD*)thread->GetBytePointer() - 1; // rewind to the opcode start + if(opcode > LastCustomOpcode) { SHOW_ERROR("Opcode [%04X] out of supported range! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); @@ -179,8 +182,7 @@ namespace CLEO { CustomOpcodeHandler handler = customOpcodeProc[opcode]; if(handler != nullptr) { - last_custom_opcode = opcode; - last_script = thread; + lastCustomOpcode = opcode; return handler(thread); } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 8af6f1d3..24a8c691 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -22,13 +22,19 @@ namespace CLEO static const size_t LastOriginalOpcode = 0x0A4E; // GTA SA static const size_t LastCustomOpcode = 0x7FFF; + // most recently processed + static CRunningScript* lastScript; + static WORD lastOpcode; + static WORD* lastOpcodePtr; + static WORD lastCustomOpcode; + void FinalizeScriptObjects(); CCustomOpcodeSystem(); virtual void Inject(CCodeInjector& inj); ~CCustomOpcodeSystem() { - //TRACE("Last opcode executed %04X at %s:%d", last_opcode, last_thread, last_off); + TRACE("Last opcode executed %04X", lastOpcode); } static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 2161ff7a..b02e2c4b 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -734,11 +734,20 @@ namespace CLEO if(false) { // TODO: get Sanny's SMC extra info + ss << "line " << 0; + ss << " - "; + ss << "CODE"; } else { - auto address = (DWORD)CurrentIP - (DWORD)BaseIP; - ss << "0x" << std::hex << std::uppercase << /*std::setw(4) << std::setfill('0') <<*/ address; + auto address = (DWORD)BaseIP; + if (address == 0) address = GetInstance().VersionManager.TranslateMemoryAddress(MA_SCM_BLOCK); + //address = (DWORD)CurrentIP - address; // processed position + address = (DWORD)CCustomOpcodeSystem::lastOpcodePtr - address; // opcode position + + ss << "offset {" << address << "}"; // Sanny offsets style + ss << " - "; + ss << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << CCustomOpcodeSystem::lastOpcode << ": ..."; } } From cc82d9d1e608e7101db0cf0268cabfa74b8aef40 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 23 Oct 2023 02:28:52 +0200 Subject: [PATCH 036/216] Changed CLEO version references from 4 to 5. Configured game debugging settings in all projects. --- CLEO4.sln => CLEO5.sln | 2 +- CLEO4.vcxproj => CLEO5.vcxproj | 399 +++++++++--------- ...4.vcxproj.filters => CLEO5.vcxproj.filters | 2 +- README.md | 9 +- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 6 + .../FileSystemOperations.vcxproj | 6 + cleo_plugins/IniFiles/IniFiles.vcxproj | 6 + .../IntOperations/IntOperations.vcxproj | 6 + cleo_sdk/CLEO.h | 8 +- source/{CLEO4.rc => CLEO5.rc} | Bin 10 files changed, 238 insertions(+), 206 deletions(-) rename CLEO4.sln => CLEO5.sln (88%) rename CLEO4.vcxproj => CLEO5.vcxproj (94%) rename CLEO4.vcxproj.filters => CLEO5.vcxproj.filters (99%) rename source/{CLEO4.rc => CLEO5.rc} (100%) diff --git a/CLEO4.sln b/CLEO5.sln similarity index 88% rename from CLEO4.sln rename to CLEO5.sln index fc5c5f87..00b71c93 100644 --- a/CLEO4.sln +++ b/CLEO5.sln @@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.4.33213.308 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLEO4", "CLEO4.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLEO5", "CLEO5.vcxproj", "{B212DDA4-2A8E-45B2-914D-7BEEB31D06B1}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/CLEO4.vcxproj b/CLEO5.vcxproj similarity index 94% rename from CLEO4.vcxproj rename to CLEO5.vcxproj index 83e30b86..5191e703 100644 --- a/CLEO4.vcxproj +++ b/CLEO5.vcxproj @@ -1,197 +1,204 @@ - - - - - Release - Win32 - - - Debug - Win32 - - - - - NotUsing - - - NotUsing - - - NotUsing - - - - NotUsing - - - NotUsing - - - NotUsing - - - NotUsing - - - NotUsing - - - NotUsing - - - - - - - - - - - - - - - - - Create - Create - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1} - true - Win32Proj - CLEO4 - 10.0 - - - - DynamicLibrary - false - MultiByte - v143 - true - - - DynamicLibrary - true - MultiByte - v143 - - - - - - - - - - - - - $(SolutionDir).output\$(Configuration)\ - $(SolutionDir).output\.obj\$(Configuration)\ - CLEO - .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) - - - $(SolutionDir).output\$(Configuration)\ - $(SolutionDir).output\.obj\$(Configuration)\ - CLEO - .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) - - - - Level3 - MaxSpeed - true - true - true - MultiThreaded - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO4_EXPORTS;%(PreprocessorDefinitions) - /Zc:threadSafeInit- %(AdditionalOptions) - Create - stdcpp17 - - - true - true - true - UseLinkTimeCodeGeneration - $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) - bass.lib;%(AdditionalDependencies) - Windows - $(SolutionDir)source\cleo.def - false - - - xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" -taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" -xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" - - - - - Level3 - Disabled - true - MultiThreadedDebug - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO4_EXPORTS;%(PreprocessorDefinitions); - /Zc:threadSafeInit- %(AdditionalOptions) - Create - stdcpp17 - - - true - Default - $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) - bass.lib;%(AdditionalDependencies) - Windows - $(SolutionDir)source\cleo.def - false - - - xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" -taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" -xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" - - - - - + + + + + Release + Win32 + + + Debug + Win32 + + + + + NotUsing + + + NotUsing + + + NotUsing + + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + + + + + + + + + + + + + + + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {B212DDA4-2A8E-45B2-914D-7BEEB31D06B1} + true + Win32Proj + CLEO5 + 10.0 + CLEO5 + + + + DynamicLibrary + false + MultiByte + v143 + true + + + DynamicLibrary + true + MultiByte + v143 + + + + + + + + + + + + + $(SolutionDir).output\$(Configuration)\ + $(SolutionDir).output\.obj\$(Configuration)\ + CLEO + .asi + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) + + + $(SolutionDir).output\$(Configuration)\ + $(SolutionDir).output\.obj\$(Configuration)\ + CLEO + .asi + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) + + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO5_EXPORTS;%(PreprocessorDefinitions) + /Zc:threadSafeInit- %(AdditionalOptions) + Create + stdcpp17 + + + true + true + true + UseLinkTimeCodeGeneration + $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) + bass.lib;%(AdditionalDependencies) + Windows + $(SolutionDir)source\cleo.def + false + + + xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" + + + + + Level3 + Disabled + true + MultiThreadedDebug + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO5_EXPORTS;%(PreprocessorDefinitions); + /Zc:threadSafeInit- %(AdditionalOptions) + Create + stdcpp17 + + + true + Default + $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) + bass.lib;%(AdditionalDependencies) + Windows + $(SolutionDir)source\cleo.def + false + + + xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" + + + + + \ No newline at end of file diff --git a/CLEO4.vcxproj.filters b/CLEO5.vcxproj.filters similarity index 99% rename from CLEO4.vcxproj.filters rename to CLEO5.vcxproj.filters index af6942cd..8b149990 100644 --- a/CLEO4.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -168,7 +168,7 @@ - + source diff --git a/README.md b/README.md index daf6fe01..649f002f 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,10 @@ CLEO requires an 'ASI Loader' installed to run which is provided with the releas No additional files are replaced, however the following files and folders are added: - cleo\ (CLEO script directory) -- cleo\FileSystemOperations.cleo (file system plugin) -- cleo\IniFiles.cleo (INI config plugin) -- cleo\IntOperations.cleo (INT operations plugin) +- cleo\cleo_plugins\DebugUtils.cleo (script debug utilities plugin) +- cleo\cleo_plugins\FileSystemOperations.cleo (file system plugin) +- cleo\cleo_plugins\IniFiles.cleo (INI config plugin) +- cleo\cleo_plugins\IntOperations.cleo (INT operations plugin) - cleo\cleo_saves\ (CLEO save directory) - cleo\cleo_text\ (CLEO text directory) - cleo.asi (core library) @@ -35,7 +36,7 @@ CLEO is continually being improved and extended over time. In very rare circumst ## Credits -The author and original developer of the CLEO library is Seemann. Development of CLEO 4 was led by Alien and Deji. Today the CLEO library is an open-source project being maintained at https://github.com/cleolibrary +The author and original developer of the CLEO library is Seemann. Development of CLEO 4 was led by Alien and Deji, later turned into CLEO 5 by Miran. Today the CLEO library is an open-source project being maintained at https://github.com/cleolibrary The author of the ASI Loader is Silent. Find out more at: https://gtaforums.com/topic/523982-relopensrc-silents-asi-loader/ diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index bcaf5cd0..37d5243b 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -53,6 +53,12 @@ DebugUtils .cleo + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + Level3 diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 7f1e6569..a309c585 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -53,6 +53,12 @@ FileSystemOperations .cleo + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + Level3 diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index c199d9fe..8c6ce1f7 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -53,6 +53,12 @@ IniFiles .cleo + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + Level3 diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj b/cleo_plugins/IntOperations/IntOperations.vcxproj index 72198218..7310a7b0 100644 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/IntOperations/IntOperations.vcxproj @@ -53,6 +53,12 @@ IntOperations .cleo + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + Level3 diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 26481048..311437ea 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -1,13 +1,13 @@ /* - CLEO 4.5 header file - Copyright (c) 2023 Alien, Deji, Junior_Djjr, Miran + CLEO 5.0.0 header file + Copyright (c) 2023 Alien, Deji, Junior_Djjr, Miran, Seemann */ #pragma once #include -#define CLEO_VERSION_MAIN 4 -#define CLEO_VERSION_MAJOR 5 +#define CLEO_VERSION_MAIN 5 +#define CLEO_VERSION_MAJOR 0 #define CLEO_VERSION_MINOR 0 #define CLEO_VERSION ((CLEO_VERSION_MAIN << 24)|(CLEO_VERSION_MAJOR << 16)|(CLEO_VERSION_MINOR << 8)) // 0x0v0v0v00 diff --git a/source/CLEO4.rc b/source/CLEO5.rc similarity index 100% rename from source/CLEO4.rc rename to source/CLEO5.rc From fa82b4fbf179af242b59c69aacf0259176185898 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 23 Oct 2023 03:57:26 +0200 Subject: [PATCH 037/216] Changelog updated. --- CHANGELOG.md | 74 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdf3c5d8..2f0d2927 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,32 +1,45 @@ -## 4.5.0 - -- introduced CLEO modules feature -- introduced DebugUtils plugin -- new opcode 00C3 (debug_on) -- new opcode 00C4 (debug_off) -- new opcode 00CC (breakpoint) -- new opcode 00CD (trace) -- new opcode 00CE (log_to_file) -- new opcode 0DD5 (get_game_platform) -- new opcode 2000 (resolve_filepath) -- new opcode 2001 (get_script_name) -- implemented support of opcodes 0662, 0663 and 0664 (original Rockstar's script debugging opcodes. See DebugUtils.ini) -- opcodes 0AAB, 0AE4, 0AE5, 0AE1, 0AE2 and 0AE3 moved from CLEO to File plugin. Adding "{$USE FILE}" might be required to compile some scripts -- introduced 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: "0:\" game root, "1:\" game save files directory, "2:\" this script file directory, "3:\" cleo folder, "4:\" cleo\cleo_modules -- added more detailed error messages in some scenarios -- on some errors instead of crashing the game just invalid script is paused -- 0AB1 (cleo_call) and 0AB2 (cleo_return) scope now saves and restores GOSUB's call stack -- when reading less than 4 bytes with 0A9D (readfile) now remaining bytes of the target variable are set to zero -- fixed error in 004E (terminate_this_script) allowing to run multiple missions -- 'argument count' parameter of 0AB1 (cleo_call) is now optional. 'cleo_call @LABEL args 0' can be written as 'cleo_call @LABEL' -- 'argument count' parameter of 0AB2 (cleo_return) is now optional. 'cleo_return 0' can be written as 'cleo_return' +## 5.0.0 + +- support for CLEO modules feature https://github.com/sannybuilder/dev/issues/264 +- new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin + - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/CLEO/00C3))** + - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/CLEO/00C4))** + - new opcode **00CC ([breakpoint](https://library.sannybuilder.com/#/sa/CLEO/00CC))** + - new opcode **00CD ([trace](https://library.sannybuilder.com/#/sa/CLEO/00CD))** + - new opcode **00CE ([log_to_file](https://library.sannybuilder.com/#/sa/CLEO/00CE))** + - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) +- new and updated opcodes + - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** + - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** + - **2001 ([get_script_name](https://library.sannybuilder.com/#/sa/CLEO/2001))** + - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` + - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` + - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE1**, **0AE2** and **0AE3** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin +- changes in file operations + - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: + - `0:\` for _game root_ directory + - `1:\` for _game save files_ directory + - `2:\` for _this script file_ directory + - `3:\` for _CLEO_ directory + - `4:\` for _CLEO\cleo_modules_ directory + - rewritten opcode **0A99 (set_current_directory)**. It no longer affects internal game state and other scripts +- improved error handling + - more detailed error messages in some scenarios + - some errors now cause the script to pause, instead of crashing the game +- SCM functions **(0AB1)** now keep their own GOSUB's call stack + + +### Bug Fixes +- fixed error in **004E (terminate_this_script)** allowing to run multiple missions - fixed handling of strings longer than 128 characters causing errors in some cases -- fixed error in handling of first string argument in 0AF5 (write_string to_ini_file) +- fixed error in handling of first string argument in **0AF5 (write_string to_ini_file)** - fixed resolution dependent aspect ratio of CLEO text in main menu - fixed clearing mission locals when new CLEO mission is started +- when reading less than 4 bytes with **0A9D (readfile)** now remaining bytes of the target variable are set to zero + #### SDK AND PLUGINS -- now all opcodes in range 0-7FFF can be registered by plugins -- plugins moved to cleo\cleo_plugins directory +- now all opcodes in range **0-7FFF** can be registered by plugins +- plugins moved to _cleo\cleo_plugins_ directory - new SDK method: CLEO_RegisterCallback - new SDK method: CLEO_GetVarArgCount - new SDK method: CLEO_SkipUnusedVarArgs @@ -37,11 +50,14 @@ - new SDK method: CLEO_GetScriptDebugMode - new SDK method: CLEO_SetScriptDebugMode - new SDK method: CLEO_Log + #### CLEO internal -- updated project settings -- updated general methods for getting and setting string parameters -- rewritten Current Working Directory (editable with 0A99) handling. CWD changes no longer affects internal game's processes and are not globally shared among all scripts -- updated opcodes handling +- project migrated to VS 2022 +- configured game debugging settings +- plugins moved into single solution +- updated pack_release.bat script +- added setup_env.bat script + ## 4.4.4 From ec679b842221b3e822d231f52f1037dcc8e10cd9 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 23 Oct 2023 06:39:46 +0200 Subject: [PATCH 038/216] fix ResolvePath returning path separator on end of workdir --- source/CScriptEngine.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index b02e2c4b..e7fde9be 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -669,8 +669,13 @@ namespace CLEO if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter { result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); - if (!result.empty() && result.back() != '\\') result.push_back('\\'); - result += path; + if (!result.empty() && result.back() == '\\') result.pop_back(); + + if (strlen(path) > 0) + { + result.push_back('\\'); + result.append(path); + } } else { From c429b6d3d479dc6b744ec5fd2d6e9ee4d16b1169 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 23 Oct 2023 06:41:49 +0200 Subject: [PATCH 039/216] fixup! fix ResolvePath returning path separator on end of workdir --- source/CScriptEngine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index e7fde9be..f2ec60eb 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -673,7 +673,7 @@ namespace CLEO if (strlen(path) > 0) { - result.push_back('\\'); + if(!result.empty()) result.push_back('\\'); result.append(path); } } From 940c9568e3b055bc1a03927bad28107d26303e6c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 24 Oct 2023 15:36:48 +0200 Subject: [PATCH 040/216] Fix read formatted string (#2) * Fix problem occurring when 'format' parameter provided to ReadFormattedString is internal buffer of ReadString * Arguments name updates in CLEO.h * Unified way params collection is implemented. Used buffer provided by ReadStringParam instead of creating new. * Disabled whole buffer clearing in CLEO_ReadParamsFormatted as real size might not always be known. * fixup! Disabled whole buffer clearing in CLEO_ReadParamsFormatted as real size might not always be known. * No reason to limit length, as user allocated memory block can be any size. * 0AD3 now respects size limitations of output variable type. --- cleo_sdk/CLEO.h | 6 +- source/CCustomOpcodeSystem.cpp | 154 +++++++++++++++++++-------------- source/CCustomOpcodeSystem.h | 1 + 3 files changed, 91 insertions(+), 70 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 311437ea..7d4fc557 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -312,9 +312,9 @@ SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // g void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); // read multiple params. Stored in opcodeParams array DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int size = 0); // exactly same as CLEO_ReadStringOpcodeParam -char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int size = 0); // consumes all var-arg params and terminator +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); +LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // exactly same as CLEO_ReadStringOpcodeParam +char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator // param skip without reading void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 6a72ef10..72892994 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -634,7 +634,15 @@ namespace CLEO { GetScriptParams(thread, 1); char* str = opcodeParams[0].pcParam; - size_t length = strlen(str); + size_t length; + if(str != nullptr) + length = strlen(str); + else + { + length = 0; + LOG_WARNING("Reading string from null pointer in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + } + if(bufSize > 0) length = min(length, bufSize - 1); // minus terminator char else @@ -700,10 +708,27 @@ namespace CLEO { // write output\result string parameter bool WriteStringParam(CRunningScript* thread, const char* str) { - size_t len = str == nullptr ? 0 : strlen(str); - len = min(len, MAX_STR_LEN - 1); // and terminator char - + auto target = GetStringParamWriteBuffer(thread); + + if(target.first != nullptr && target.second > 0) + { + size_t length = str == nullptr ? 0 : strlen(str); + length = min(length, target.second - 1); // and null terminator + + if (length > 0) std::memcpy(target.first, str, length); + target.first[length] = '\0'; + + return true; // ok + } + + return false; // failed + } + + std::pair GetStringParamWriteBuffer(CRunningScript* thread) + { char* targetBuff; + DWORD targetSize; + auto paramType = CLEO_GetOperandType(thread); switch(paramType) { @@ -714,38 +739,29 @@ namespace CLEO { case DT_VAR_ARRAY: case DT_LVAR_ARRAY: GetScriptParams(thread, 1); - targetBuff = opcodeParams[0].pcParam; - break; + return { opcodeParams[0].pcParam, 0x7FFFFFFF }; // user allocated memory block can be any size // short string variable case DT_VAR_TEXTLABEL: case DT_LVAR_TEXTLABEL: case DT_VAR_TEXTLABEL_ARRAY: case DT_LVAR_TEXTLABEL_ARRAY: - targetBuff = (char*)GetScriptParamPointer(thread); - len = min(len, 7); // 8 with terminator - break; + return { (char*)GetScriptParamPointer(thread), 8 }; // long string variable case DT_VAR_STRING: case DT_LVAR_STRING: case DT_VAR_STRING_ARRAY: case DT_LVAR_STRING_ARRAY: - targetBuff = (char*)GetScriptParamPointer(thread); - len = min(len, 15); // 16 with terminator - break; + return { (char*)GetScriptParamPointer(thread), 16 }; default: { - CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param SHOW_ERROR("Outputing string into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return false; + CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param + return { nullptr, 0 }; } } - - if(len > 0) std::memcpy(targetBuff, str, len); - targetBuff[len] = '\0'; - return true; // ok } // perform 'sprintf'-operation for parameters, passed through SCM @@ -840,7 +856,7 @@ namespace CLEO { { static const char none[] = "(null)"; if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; - const char *astr = ReadStringParam(thread); + const char *astr = ReadStringParam(thread, bufa, sizeof(bufa)); const char *striter = astr ? astr : none; while (*striter) { @@ -2408,10 +2424,10 @@ namespace CLEO { //0ACB=3,show_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) { - const char *text = ReadStringParam(thread); - DWORD time, - style; - *thread >> time >> style; + auto text = ReadStringParam(thread); + DWORD time; *thread >> time; + DWORD style; *thread >> style; + PrintBig(text, time, style); return OR_CONTINUE; } @@ -2419,9 +2435,9 @@ namespace CLEO { //0ACC=2,show_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) { - const char *text = ReadStringParam(thread); - DWORD time; - *thread >> time; + auto text = ReadStringParam(thread); + DWORD time; *thread >> time; + Print(text, time); return OR_CONTINUE; } @@ -2429,9 +2445,9 @@ namespace CLEO { //0ACD=2,show_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) { - const char *text = ReadStringParam(thread); - DWORD time; - *thread >> time; + auto text = ReadStringParam(thread); + DWORD time; *thread >> time; + PrintNow(text, time); return OR_CONTINUE; } @@ -2439,10 +2455,9 @@ namespace CLEO { //0ACE=-1,show_formatted_text_box %1d% OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread) { - char fmt[MAX_STR_LEN]; - char text[MAX_STR_LEN]; - ReadStringParam(thread, fmt, sizeof(fmt)); - ReadFormattedString(thread, text, sizeof(text), fmt); + auto format = ReadStringParam(thread); + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + PrintHelp(text); return OR_CONTINUE; } @@ -2450,11 +2465,11 @@ namespace CLEO { //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread) { - char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; - DWORD time, style; - ReadStringParam(thread, fmt, sizeof(fmt)); - *thread >> time >> style; - ReadFormattedString(thread, text, sizeof(text), fmt); + auto format = ReadStringParam(thread); + DWORD time; *thread >> time; + DWORD style; *thread >> style; + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + PrintBig(text, time, style); return OR_CONTINUE; } @@ -2462,11 +2477,10 @@ namespace CLEO { //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread) { - char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; - DWORD time; - ReadStringParam(thread, fmt, sizeof(fmt)); - *thread >> time; - ReadFormattedString(thread, text, sizeof(text), fmt); + auto format = ReadStringParam(thread); + DWORD time; *thread >> time; + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + Print(text, time); return OR_CONTINUE; } @@ -2474,11 +2488,10 @@ namespace CLEO { //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread) { - char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; - DWORD time; - ReadStringParam(thread, fmt, sizeof(fmt)); - *thread >> time; - ReadFormattedString(thread, text, sizeof(text), fmt); + auto format = ReadStringParam(thread); + DWORD time; *thread >> time; + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + PrintNow(text, time); return OR_CONTINUE; } @@ -2507,13 +2520,20 @@ namespace CLEO { //0AD3=-1,string %1d% format %2d% ... OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) { - char fmt[MAX_STR_LEN], *dst; + auto resultArg = GetStringParamWriteBuffer(thread); - if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread >> dst; - else dst = &GetScriptParamPointer(thread)->cParam; + auto format = ReadStringParam(thread); + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + + if (resultArg.first != nullptr && resultArg.second > 0) + { + size_t length = text == nullptr ? 0 : strlen(text); + length = min(length, resultArg.second - 1); // and null terminator + + if (length > 0) std::memcpy(resultArg.first, text, length); + resultArg.first[length] = '\0'; + } - ReadStringParam(thread, fmt, sizeof(fmt)); - ReadFormattedString(thread, dst, MAX_STR_LEN, fmt); // TODO: get actual length limit based on target type return OR_CONTINUE; } @@ -2609,11 +2629,10 @@ namespace CLEO { //0AD9=-1,write_formated_text %2d% to_file %1d% OpcodeResult __stdcall opcode_0AD9(CRunningScript *thread) { - char fmt[MAX_STR_LEN]; char text[MAX_STR_LEN]; - DWORD hFile; - *thread >> hFile; - ReadStringParam(thread, fmt, sizeof(fmt)); - ReadFormattedString(thread, text, sizeof(text), fmt); + DWORD hFile; *thread >> hFile; + auto format = ReadStringParam(thread); + char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + if (FILE * file = convert_handle_to_file(hFile)) { fputs(text, file); @@ -2625,9 +2644,8 @@ namespace CLEO { //0ADA=-1,%3d% = scan_file %1d% format %2d% //IF and SET OpcodeResult __stdcall opcode_0ADA(CRunningScript *thread) { - DWORD hFile; - *thread >> hFile; - char *fmt = ReadStringParam(thread); + DWORD hFile; *thread >> hFile; + auto format = ReadStringParam(thread); int *result = (int *)GetScriptParamPointer(thread); @@ -2639,7 +2657,7 @@ namespace CLEO { if (FILE *file = convert_handle_to_file(hFile)) { - *result = fscanf(file, fmt, + *result = fscanf(file, format, /* extra parameters (will be aligned automatically, but the limit of 35 elements maximum exists) */ ExParams[0], ExParams[1], ExParams[2], ExParams[3], ExParams[4], ExParams[5], ExParams[6], ExParams[7], ExParams[8], ExParams[9], ExParams[10], ExParams[11], @@ -3034,17 +3052,19 @@ extern "C" WriteStringParam(thread, str); } - char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int size) + char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int bufSize) { static char internal_buf[MAX_STR_LEN * 4]; - if (!buf) { buf = internal_buf; size = sizeof(internal_buf); } - if (!size) size = MAX_STR_LEN; - std::fill(buf, buf + size, '\0'); + if (!buf) { buf = internal_buf; bufSize = sizeof(internal_buf); } + if (!bufSize) bufSize = MAX_STR_LEN; if(format != nullptr && strlen(format) > 0) - ReadFormattedString(thread, buf, size, format); + ReadFormattedString(thread, buf, bufSize, format); else + { SkipUnusedVarArgs(thread); + if(bufSize > 0) buf[0] = '\0'; + } return buf; } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 24a8c691..6a0a023f 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -70,6 +70,7 @@ namespace CLEO char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); bool WriteStringParam(CRunningScript* thread, const char* str); + std::pair GetStringParamWriteBuffer(CRunningScript* thread); // consumes the param int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); void SkipUnusedVarArgs(CRunningScript* thread); // for var-args opcodes DWORD GetVarArgCount(CRunningScript* thread); // for var-args opcodes From 600e7bd930d91f10213be7b2c9f9733175bdd1af Mon Sep 17 00:00:00 2001 From: Miran Date: Wed, 25 Oct 2023 01:13:00 +0200 Subject: [PATCH 041/216] Added loading plugins from legacy location --- source/CModuleSystem.cpp | 4 +-- source/CPluginSystem.h | 63 ++++++++++++++++++++++++++++++++++------ source/CScriptEngine.cpp | 12 ++++---- source/CTextManager.cpp | 8 ++--- source/FileEnumerator.h | 3 +- 5 files changed, 68 insertions(+), 22 deletions(-) diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index c66ddbbe..98e0054d 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -60,9 +60,9 @@ bool CModuleSystem::LoadFile(const char* path) bool CModuleSystem::LoadDirectory(const char* path) { bool result = true; - FilesWalk(path, ".s", [&](const char* filename) + FilesWalk(path, ".s", [&](const char* fullPath, const char* filename) { - result &= LoadFile(filename); + result &= LoadFile(fullPath); }); return result; diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 3940a836..11ffed3b 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -1,9 +1,11 @@ #pragma once -#include -#include -#include #include "FileEnumerator.h" #include "CDebug.h" +#include +#include +#include +#include + namespace CLEO { @@ -14,16 +16,59 @@ namespace CLEO public: CPluginSystem() { + std::set loaded; + TRACE("Loading plugins..."); - FilesWalk("cleo\\cleo_plugins", ".cleo", [this](const char *filename) + + FilesWalk("cleo\\cleo_plugins", ".cleo", [&](const char* fullPath, const char* filename) + { + std::string name = filename; + std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); + + if(loaded.find(name) == loaded.end()) + { + TRACE("Loading plugin '%s'", fullPath); + HMODULE hlib = LoadLibrary(fullPath); + if (!hlib) + { + LOG_WARNING("Error loading plugin '%s'", fullPath); + } + else + { + loaded.insert(name); + plugins.push_back(hlib); + } + } + else + { + LOG_WARNING("Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); + } + }); + + // load plugins from legacy location + FilesWalk("cleo", ".cleo", [&](const char* fullPath, const char* filename) { - TRACE("Loading plugin '%s'", filename); - HMODULE hlib = LoadLibrary(filename); - if (!hlib) + std::string name = filename; + std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); + + if(loaded.find(name) == loaded.end()) + { + TRACE("Loading plugin '%s'", fullPath); + HMODULE hlib = LoadLibrary(fullPath); + if (!hlib) + { + LOG_WARNING("Error loading plugin '%s'", fullPath); + } + else + { + loaded.insert(name); + plugins.push_back(hlib); + } + } + else { - LOG_WARNING("Error loading plugin '%s'", filename); + LOG_WARNING("Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); } - else plugins.push_back(hlib); }); } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index f2ec60eb..7268ac25 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -989,18 +989,18 @@ namespace CLEO TRACE("Searching for cleo scripts"); - FilesWalk(scriptsDir.c_str(), cs_ext, [this](const char *filename) { - auto cs = LoadScript(filename); + FilesWalk(scriptsDir.c_str(), cs_ext, [this](const char* fullPath, const char* filename) { + auto cs = LoadScript(fullPath); cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state }); - FilesWalk(scriptsDir.c_str(), cs4_ext, [this](const char *filename) { - auto cs = LoadScript(filename); + FilesWalk(scriptsDir.c_str(), cs4_ext, [this](const char* fullPath, const char* filename) { + auto cs = LoadScript(fullPath); if (cs) cs->SetCompatibility(CLEO_VER_4); }); - FilesWalk(scriptsDir.c_str(), cs3_ext, [this](const char *filename) { - auto cs = LoadScript(filename); + FilesWalk(scriptsDir.c_str(), cs3_ext, [this](const char* fullPath, const char* filename) { + auto cs = LoadScript(fullPath); if (cs) cs->SetCompatibility(CLEO_VER_3); }); diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index c4ed702e..b0fd9002 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -104,17 +104,17 @@ namespace CLEO CTextManager::CTextManager() : fxts(1, crc32FromUpcaseStdString) { // parse FXT files - FilesWalk("cleo\\cleo_text", ".fxt", [this](const char *fname) + FilesWalk("cleo\\cleo_text", ".fxt", [this](const char* fullPath, const char* filename) { - TRACE("Parsing FXT file %s", fname); + TRACE("Parsing FXT file %s", fullPath); try { - std::ifstream stream(fname); + std::ifstream stream(fullPath); ParseFxtFile(stream); } catch (std::exception& ex) { - LOG_WARNING("Loading of FXT file '%s' failed: \n%s", fname, ex.what()); + LOG_WARNING("Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); } }); } diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index 0592a585..de43c605 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -20,7 +20,8 @@ void FilesWalk(const char* directory, const char* extension, T callback) } } - callback(std::filesystem::absolute(filePath).string().c_str()); + auto result = std::filesystem::absolute(filePath); + callback(result.string().c_str(), result.filename().string().c_str()); } } } From af9bf5c6e1f2800ef5d65e738e435946c3904a8b Mon Sep 17 00:00:00 2001 From: Seemann Date: Thu, 26 Oct 2023 09:48:06 -0400 Subject: [PATCH 042/216] setup github workflow for automatic releases (#6) --- .github/workflows/main.yml | 89 ++++++++++++++++++ .github/workflows/markdown.js | 16 ++++ .github/workflows/version.js | 47 +++++++++ .gitignore | 4 +- .gitmodules | 3 + CLEO5.vcxproj | 12 ++- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 4 + cleo_plugins/IniFiles/IniFiles.vcxproj | 8 +- .../IntOperations/IntOperations.vcxproj | 8 +- cleo_sdk/CLEO.h | 2 +- cleo_sdk/CLEO.lib | Bin 10440 -> 0 bytes pack_release.bat | 35 ------- third-party/plugin-sdk | 1 + 13 files changed, 183 insertions(+), 46 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/markdown.js create mode 100644 .github/workflows/version.js create mode 100644 .gitmodules delete mode 100644 cleo_sdk/CLEO.lib delete mode 100644 pack_release.bat create mode 160000 third-party/plugin-sdk diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..2e6b9719 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,89 @@ +name: CLEO 5 Release Build + +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+**' + +jobs: + build: + runs-on: windows-2022 + permissions: + contents: write + + steps: + - uses: actions/checkout@v2 + + - name: Add msbuild to PATH + uses: microsoft/setup-msbuild@v1.1 + + - uses: actions/checkout@v2 + with: + submodules: "true" + + - uses: actions/setup-node@v3 + with: + node-version: lts/* + + - name: Read Version Tag + id: read_version + run: node.exe .github/workflows/version.js + + - name: Build Projects + shell: cmd + run: | + set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk + msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA + msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + + - name: Prepare Files + id: prepare_archive + shell: cmd + run: | + @REM create output directory + mkdir .output\Release\cleo + mkdir .output\Release\cleo\cleo_modules + mkdir .output\Release\cleo\cleo_plugins + mkdir .output\Release\cleo\cleo_saves + mkdir .output\Release\cleo\cleo_text + mkdir .output\Release\cleo_readme + + @REM copy files + copy third-party\bass\bass.dll .output\Release\bass.dll + copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini + copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins + copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins + + @REM install Silent's ASI Loader + curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip + powershell.exe -NoP -NonI -Command "Expand-Archive '.\silents_asi_loader_13.zip' '.\.output\Release'" + move .output\Release\ReadMe.txt ".output\Release\cleo_readme\ASI Loader Readme.txt" + rmdir /s /q .output\Release\advanced_plugin_management_example + rmdir /s /q .output\Release\scripts + + + - name: Convert Markdown to HTML + id: md_to_html + run: | + npm install showdown + node.exe .github/workflows/markdown.js + move README.html .output\Release\cleo_readme\README.html + move CHANGELOG.html .output\Release\cleo_readme\CHANGELOG.html + + - name: Pack binaries (Main) + uses: ThirteenAG/zip-release@master + with: + path: ./.output/Release/* + type: "zip" + filename: ${{ steps.read_version.outputs.archive_name }} + exclusions: "*.pdb *.lib *.exp *.map" + + - name: Upload Release + uses: ncipollo/release-action@v1.10.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + name: ${{ steps.read_version.outputs.version }} + bodyFile: 'changes.txt' # generated in read_version + tag: ${{ github.ref_name }} + prerelease: ${{ contains(github.ref_name, 'beta') || contains(github.ref_name, 'alpha') }} + artifacts: ${{ steps.read_version.outputs.archive_name }} diff --git a/.github/workflows/markdown.js b/.github/workflows/markdown.js new file mode 100644 index 00000000..445c159a --- /dev/null +++ b/.github/workflows/markdown.js @@ -0,0 +1,16 @@ +const showdown = require('showdown'); +const {readFileSync, writeFileSync} = require('fs'); + +const md = new showdown.Converter({ + literalMidWordUnderscores: true, + disableForced4SpacesIndentedSublists: true, + noHeaderId: true, + completeHTMLDocument: true, + simplifiedAutoLink: true, +}); + +const readme = md.makeHtml(readFileSync('README.md', 'utf8')); +const changelog = md.makeHtml(readFileSync('CHANGELOG.md', 'utf8')); + +writeFileSync('README.html', readme, 'utf8'); +writeFileSync('CHANGELOG.html', changelog, 'utf8'); diff --git a/.github/workflows/version.js b/.github/workflows/version.js new file mode 100644 index 00000000..67613633 --- /dev/null +++ b/.github/workflows/version.js @@ -0,0 +1,47 @@ +const { appendFileSync, readFileSync, writeFileSync } = require("fs"); +const { EOL } = require("os"); +const { GITHUB_OUTPUT, GITHUB_REF_NAME } = process.env; + +if (GITHUB_REF_NAME) { + const version = GITHUB_REF_NAME.startsWith("v") ? GITHUB_REF_NAME.slice(1) : GITHUB_REF_NAME; + addOutput("version", version); + addOutput("archive_name", `SA.CLEO_${GITHUB_REF_NAME}.zip`); + + // update cleo.h to replace version + const cleoH = readFileSync("cleo_sdk/cleo.h", { encoding: "utf-8" }); + + const [, main, major, minor] = version.match(/(\d+)\.(\d+)\.(\d+).*/); + + const newCleoH = cleoH + .replace(/#define\s+CLEO_VERSION_MAIN\s+.*/, `#define CLEO_VERSION_MAIN ${main}`) + .replace(/#define\s+CLEO_VERSION_MAJOR\s+.*/, `#define CLEO_VERSION_MAJOR ${major}`) + .replace(/#define\s+CLEO_VERSION_MINOR\s+.*/, `#define CLEO_VERSION_MINOR ${minor}`) + .replace(/#define\s+CLEO_VERSION_STR\s+.*/, `#define CLEO_VERSION_STR "${version}"`); + writeFileSync("cleo_sdk/cleo.h", newCleoH, { encoding: "utf-8" }); +} + +const changelog = readFileSync("CHANGELOG.md", { encoding: "utf-8" }); +writeFileSync("changes.txt", getChanges().join(EOL), { encoding: "utf-8" }); + +function addOutput(key, value) { + appendFileSync(GITHUB_OUTPUT, `${key}=${value}${EOL}`, { encoding: "utf-8" }); +} + +function getChanges() { + const lines = changelog.split(EOL); + const result = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line.trimStart().startsWith("## ")) { + for (let j = i + 1; j < lines.length; j++) { + const line = lines[j]; + if (line.trimStart().startsWith("## ")) { + return result; + } + result.push(line); + } + } + } + + return result; +} diff --git a/.gitignore b/.gitignore index cee44a0e..47a59810 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,6 @@ Debug/* Release/* ipch/ .vs/ -*/.output *.zip -/.output/*/*.lib +*.lib +node_modules/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..4cab7a0d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "plugin-sdk"] + path = third-party/plugin-sdk + url = https://github.com/DK22Pac/plugin-sdk diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 5191e703..7ede7b03 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -147,7 +147,7 @@ true MultiThreaded $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO5_EXPORTS;%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;%(PreprocessorDefinitions) /Zc:threadSafeInit- %(AdditionalOptions) Create stdcpp17 @@ -165,9 +165,11 @@ xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +if defined GTA_SA_DIR ( taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" -xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" +) @@ -177,7 +179,7 @@ xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" true MultiThreadedDebug $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";CLEO5_EXPORTS;%(PreprocessorDefinitions); + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;%(PreprocessorDefinitions); /Zc:threadSafeInit- %(AdditionalOptions) Create stdcpp17 @@ -193,9 +195,11 @@ xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" +if defined GTA_SA_DIR ( taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" -xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" +xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" +) diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 37d5243b..119798e5 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -84,7 +84,9 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" +if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) @@ -109,7 +111,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" +if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index 8c6ce1f7..7a80d178 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -83,7 +83,9 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) @@ -106,7 +108,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj b/cleo_plugins/IntOperations/IntOperations.vcxproj index 7310a7b0..adfe34fa 100644 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/IntOperations/IntOperations.vcxproj @@ -83,7 +83,9 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) @@ -106,7 +108,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 7d4fc557..a2835139 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -1,5 +1,5 @@ /* - CLEO 5.0.0 header file + CLEO 5 header file Copyright (c) 2023 Alien, Deji, Junior_Djjr, Miran, Seemann */ #pragma once diff --git a/cleo_sdk/CLEO.lib b/cleo_sdk/CLEO.lib deleted file mode 100644 index 70f64d4d1ee757c868cc6b964f167c7650369c9e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10440 zcmb_ieQX>@6@PX<>wLLR$cN+D=a1MaDvsmcxh9IDbL=KKy~OU>DFRhKZ|*kEsdu+# zZ!bz9qESRdB%rEFQLFyf2o-_&D=Pc}k(x?wRgefM{Dr6}5|JquN>HoPptcI{?dX{`n>TarJfxY~vxl4Z$HeO^2gG;&HsCxA#)qB9*(LyZ5@72R z!1nb3I}QQtI`7b3yd$;FFxveN08#sgjA91?h&l%ub?pQo>NXwv0PjdWC{MKas6#jK zj?{aNQ6I|K^a9?A?lamq3qaI1#3*(h@sRes%Bb}=_6uoOBct8Nv42QAuQKYwJ5lHB zjM^__zmagH(KpQ${GMVuclN0Ib%-9oCqv`R< z>4|h^H1)&;WHKjDrBeP@c#fJ`HFEl-W@^P` z9989u^O;Z%TPI2%(~A1jx>?Q}rR0DtC^d4K@lrURznx1?%r4QZ(7%KQxD_nlwgBcZ27O7c}w@Zh-mb3OvQYeq9tWywL%#((kqKRqZ*FL zBEB(>vZY!1EbA?@NUwHpF*+xWdwW(bhGR%drlO=>^@=>F*qNO-gbhT(*|9UNp|> z5fp_pI=WcG8Py7f(^~dfX=Ky7znPB;y4i?1H)>Q$mSh0Vhtz1xP{5ulRmysfwU#9@ zWxLZ$EoZM<<)emK)GSNSB@YZq8CT81d&`E4F9yEF&(|!&c+5>w4Rr9+!>6 zIb3S3v$C^c9u%5Hq%r+;W$py7($e6#Py&N3i#e1#1D6%!L(O2zri?jh%GsCNYrU_! zwO$_<0L-9pGRLV6YP_s4f7r8{h+^nI3?ndjW1B zUBe1#Cn|aY=|0lzK7gS%fa@st>K=gGsH+im9!Gswv9i00dR|9;mr-{=$}V&OG~;tG z@~%eR*HQL1>Yu^yScpO!w8Cmw18ZRy?1pu)9ri*u?1V1pfc4M^o$wIsfF@{%4X_7# zU?XgT&Cm;5pc!KDFtk7eG_HV_t*{Ng1nloBr&?Ps{+G#Cs!EUhSmaWT3xwrWbjjrs z9=>ikIH(KmxS}RpK$!+vk5VV3y&0%Y2 zT3@z^^rme`b3g$Zwq!VxinNs0Yp*J8ZJ2VQXoaYiiJnE4?)TVqAZsZ3qSuF8k8G6+K5G+A(Tu3c|zb-Web79U&|?)S^kBS=ij!ZJb z_E6eniW1`9@i{v~+DA#A(<1uVua)z>`djXVFFrizob}B%r5aYjqrdsq{k08P`C)~H zS3`e}s@a|?_6l$qAAZPRoO=?QkM!qrnx*v&@}_P0@Oz}cIUiQxJqZIh9X>p%V^$y>Vu(f#|ejsIZZN_5nt&1%u@>$a;8)vN8zVsup)c8p% z4}I50{6d1H04qxS2@mwJf~0R80@1TZaUQ$**2RW~2M;(H@zX{9Y6Na=62R{1S^mYg zi%?eaFLC0*58(17s$b|kmbdOj+ObM@-w7*)Z!2|{$gPa(9fR@5J>Jm>FTv;k_%-&2 z^;7K~IZyBD(c>MD@h0+)Yy8EzlXsBbm7Vn{vq>`$cqrRc^O!v@M$|8wuHL~z;m|P) ziKv7#JhsPDM0qbyM8)}c^3+@W#ksHC*hU#o+}H)2W!E~EL+!4<+#AK*Us>RdN&E9z?ZuDDJoQmo4uvJc^ij=ux#bxK6JdJE54@H9tRM z&nsmKcbvu2zivF0t#78=-XmM_FsX_cyql;SJ2B)R=X;mG@o5j@c6qVo+S21x96ajW zs{HtSKilyK^5d8v)3fK{NOk0;VZWWPymsbeJTSvkD9;!(JH>sJ3(FrOTdMHlO)q`E zL?esGan+e+sC<+4MaEX)yEm@v|6>f%@F1{?DDL*SVKv8TTrsX+Zn^LtVNr$X#tOG> zz=d!hqK;-&)S59@8PkdVOi#H~+UEF{#>QV_My^t9RoreC?yH$Uo;hXa7#~ zNjnaXuh`+cp!^}~SSJ)w`NqeW7AY$6P~FA!9U5?;nnzi+qWtvxKl(I=XtZPGR!S$B zb>pd4l!c``pU{dzRVK#^IU21SJE5o;9z1{X9>q?l2U|ETsUJ-dt=qbO@)1QV?Si?b z$^)4CQ58}1@{3pgP4y=2%o(csOveSSJysO;-#_;~vq_ zEYRw<#YGJ~9OQ7WA4kQFE9UguZ;btib`W%iCH-?s{Wy>LtIPJvyLV~F`!L}M2a+yK X%^5&t&tk{>pOHN+gd-kxx Date: Thu, 26 Oct 2023 16:29:21 +0200 Subject: [PATCH 043/216] Error messages updates. (#5) --- source/CCustomOpcodeSystem.cpp | 22 +++++++++++++++------- source/CScriptEngine.cpp | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 72892994..fb858900 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1317,7 +1317,7 @@ namespace CLEO { GetInstance().CodeInjector.MemoryRead(Address, (DWORD)opcodeParams[0].dwParam, vp); break; default: - SHOW_ERROR("Invalid size param (%d) of opcode [0A8D])", size); + SHOW_ERROR("Invalid size param (%d) of opcode [0A8D] in script %s", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); } SetScriptParams(thread, 1); @@ -1390,7 +1390,8 @@ namespace CLEO { CCustomScript *cs = reinterpret_cast(thread); if (thread->IsMission() || !cs->IsCustom()) { - LOG_WARNING("[0A93] Incorrect usage of opcode in script '%s'", ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING("Incorrect usage of opcode [0A93] in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return OR_CONTINUE; } GetInstance().ScriptEngine.RemoveCustomScript(cs); @@ -2012,7 +2013,8 @@ namespace CLEO { default: { - SHOW_ERROR("Invalid first argument type (%02X) of [0AB1] opcode in script '%s' \nScript suspended.", *thread->GetBytePointer(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type (%02X) of the first argument in opcode [0AB1] in script %s \nScript suspended.", *thread->GetBytePointer(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -2026,7 +2028,7 @@ namespace CLEO { auto pos = str.find('@'); if (pos == str.npos) { - SHOW_ERROR("Invalid module reference '%s' in 0AB1 opcode in script '%s' \nScript suspended.", moduleTxt, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid module reference '%s' in opcode [0AB1] in script %s \nScript suspended.", moduleTxt, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } std::string_view strExport = str.substr(0, pos); @@ -2040,7 +2042,7 @@ namespace CLEO { auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath, strExport); if (!scriptRef.Valid()) { - SHOW_ERROR("Not found module '%s' export '%s', requested by 0AB1 opcode in script '%s'", modulePath.c_str(), &str[0], ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Not found module '%s' export '%s', requested by opcode [0AB1] in script %s", modulePath.c_str(), &str[0], ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } scmFunc->moduleExportRef = scriptRef.base; // to be released on return @@ -2053,6 +2055,12 @@ namespace CLEO { DWORD nParams = 0; if(*thread->GetBytePointer()) *thread >> nParams; + if(nParams > 32) + { + SHOW_ERROR("Argument count (%d), out of supported range (32) of opcode [0AB1] in script %s", nParams, ((CCustomScript*)thread)->GetInfoStr().c_str()); + + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } static SCRIPT_VAR arguments[32]; SCRIPT_VAR* locals = thread->IsMission() ? missionLocals : thread->GetVarPtr(); @@ -2678,7 +2686,7 @@ namespace CLEO { *thread >> mi; CVehicleModelInfo* model; - // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster + // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) { model = plugin::CallAndReturn(mi); } @@ -2705,7 +2713,7 @@ namespace CLEO { *thread >> mi; CVehicleModelInfo* model; - // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster + // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) { model = plugin::CallAndReturn(mi); } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 7268ac25..640de6fb 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -253,7 +253,7 @@ namespace CLEO { if (!pScript->IsMission()) { - TRACE("[004E] Incorrect usage of opcode in script '%s'.", pScript->GetName()); + TRACE("Incorrect usage of opcode [004E] in script %s.", pScript->GetName()); } else *MissionLoaded = false; GetInstance().ScriptEngine.RemoveCustomScript(pScript); From a912845fbd67a40130d1b49ff55190de7b0bf365 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 26 Oct 2023 17:46:45 +0200 Subject: [PATCH 044/216] changelog updated (#8) --------- Co-authored-by: Seemann --- CHANGELOG.md | 194 ++------------------------------------------------- 1 file changed, 4 insertions(+), 190 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f0d2927..cb65fb15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - new and updated opcodes - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** - - **2001 ([get_script_name](https://library.sannybuilder.com/#/sa/CLEO/2001))** + - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE1**, **0AE2** and **0AE3** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin @@ -27,6 +27,7 @@ - more detailed error messages in some scenarios - some errors now cause the script to pause, instead of crashing the game - SCM functions **(0AB1)** now keep their own GOSUB's call stack +- updated included Silent's ASI Loader to version 1.3 ### Bug Fixes @@ -55,194 +56,7 @@ - project migrated to VS 2022 - configured game debugging settings - plugins moved into single solution -- updated pack_release.bat script +- configured automatic releases on GitHub - added setup_env.bat script - -## 4.4.4 - -- added string arguments support to 0AB1 (cleo_call) -- fix an issue when PRINT_STRING and PRINT_BIG_STRING would overwrite each other (see https://github.com/cleolibrary/CLEO4/issues/80) -- update BASS.dll to the latest to solve some issues with audio in game (see https://github.com/cleolibrary/CLEO4/issues/70) -- added support of variable length arguments longer than 128 characters - -## 4.4.3 - -- added correct support of condition result to opcodes 0AB1 0AB2. Fixes possible bugs when 0AB1 are used in multi conditional if statements. -- set condition result in 0ADA and 0AD8 - -## 4.4.2 - -- fix eventual crash when using the game's user track player radio station (see https://github.com/cleolibrary/CLEO4/issues/38 for details) -- fix 0AAA opcode not working with scripts from main.scm - -## 4.4.1 - -- fix some issues with audio stream output #61 (by @GeTechG) -- compatibility with latest plugin-sdk - -## 4.4.0 - -- Dropped Windows XP support - -## 4.3.24 - -- Added the export of some functions required for new version of the CLEO+ plugin, and can be used in other plugins: CLEO_GetScriptTextureById, CLEO_GetInternalAudioStream, CLEO_CreateCustomScript, CLEO_GetLastCreatedCustomScript, CLEO_AddScriptDeleteDelegate, CLEO_RemoveScriptDeleteDelegate. -- Fixed sounds not pausing when unfocus (thanks to dkluin). -- Opcodes for finding entities (0AE1, 0AE2, 0AE3) now use a distance check with optimized performance, and ignore the distance limitation if the argument sent is greater than 1000.0. -- Opcode for finding peds (0AE1) now makes it possible to send "-1" in the "pass_deads" parameter to ignore all checks and return literally all peds. -- Opcode for car number of gears (0AB7) now returns from vehicle class itself instead of using model and handling arrays — now compatible with f92la and IndieVehicles. -- Opcodes for blip target coordinates (0AB6), car name (0ADB) and spawn (0ADD) are now compatible with f92la. -- Now the full version is shown in the SDK and menu text. - -## 4.3.23 - -- Now you can use string pointer in the file address parameter for .ini files opcodes. -- Fixed the 0ABA opcode causing heap corruption. -- Fixed shared variables not reset correctly. Which caused malfunctions in mods that use them and you play a new game or load game in a slot without the variables. - -## 4.3.22 - -- Now creates cleo, cleo/cleo_saves and cleo/cleo_text directories on startup if they do not exist -- Fix to issue with 0AE9 not returning result - -## 4.3.21 - -- Fixed operand type IDs in CLEO.h -- Added 'extern' to variables declared in CLEO.h -- Fix to issue with 0AB1 in missions not storing mission locals - -## 4.3.19-20 - -- Fixed issue with 0AB1 passing incorrect variable scope in missions -- Updated SDK version - -## 4.3.17-18 - -- Fixed potential future problem with 0AB0 which used methods with undefined behaviour -- Fixed incorrect method used for 0AB7 - -## 4.3.16 - -- Fixed bugs with CLEO saves when saved scripts ended -- Prevented crashing when invalid audiostream handles are used - -## 4.3.15 - -- Improvemed compatibility fix for opcodes 0AE1, 0AE2 and 0AE3 with incorrect find_next usage - -## 4.3.14 - -- Fixed 0AAA only returning custom scripts -- Fixed many things which use the 'SCM Block' or 'Mission Local Storage' space -- Fixed parameters being passed to script local storage instead of mission local storage through 0A94 -- Fixed potential problems with iteration through the script queues (may cause rare and hard to trace bugs) - -## 4.3.13 - -- Fixed crashing when starting a new game after a game has already started with CLEO scripts installed -- Possibly fixed other issues with starting a game with CLEO scripts installed - -## 4.3.12.1 - -- Un-did the 'Scripts no longer load prematurely' fix as it caused scripts to not load certain circumstances (like before CLEO 4) -- Included 'cleo_text' folder in installation - -## 4.3.12 - -- Fixed string parameter skipping in 'SkipOpcodeParams' used by CLEO plugins -- 0AC8 now returns a NULL value to the output var if allocation failed (as it did before 4.3a) -- 0AC9 now checks the memory was allocated by 0AC8 before attempting to free it -- FXT references are now case insensitive (as they were before 4.3a) -- File operations now check the input handle isn't null (as it seems was the way before 4.3a) -- 'Loaded mission' status now reset on new/loaded game (as it was before 4.3a) -- Scripts no longer load prematurely (like before 4.3a) -- Resolved conflicts with other menu hooks such as 'HUME' -- Other minor tweaks - -## 4.3.11 - -- Fixed crash with 0ADA in scripts beginning with an opcode ending in '00' - -## 4.3.10 - -- Improvements to opcodes 0AE1, 0AE2 and 0AE3 - now loops around the pool even when the 'find_next' flag isn't used correctly -- Fixed 0AD2 not returning peds targetted with the mouse, while targetting with a pad worked - -## 4.3.9 - -- Will now be able to start a CLEO mission after recently finishing a standard mission -- Will no longer error & terminate when scripts fail to open and instead simply log the error -- Will no longer terminate on warnings -- No longer includes paths in automatically generated script names (e.g. cleo\dir\demo.cs is now named 'demo.cs' and not 'dir\dem') -- Improved handling of script load errors - -## 4.3.8 - -- Fixed crash which would occur when missions were ended with 004E - -## 4.3.7 - -- Custom missions launched by CLEO scripts now inherit their compatibility mode - possibly fixing incompatibilities with mods using custom missions -- The current directory set by 0A99 is now script-dependant and only affects running CLEO scripts (not the entire game or the main.scm) -- Text and texture/sprite draws are now script-dependant (doesn't affect main.scm scripts) - -## 4.3.0 - -- Replaced code which dynamically allocated and deallocated memory for script parameters every time 0AA5-0AA8 were called with static arrays -- Removed a script execution loop replacement which wasn't used for anything important and weirdly only worked with 1.0US that caused crashes with script logging plugins -- Added support for Steam (v3) versions of gta_sa.exe -- Prevented the local storage from being initialized in SCM functions when the script is in CLEO 3 compatibility mode ('.cs3' extension) - -### Updates to behaviour of following opcodes: - -#### 0A99 - -CHANGE_DIRECTORY can now correctly change to the program directory - -#### 0A9A - -OPEN_FILE now uses a 'legacy' mode when passing an integer as the mode parameter for compatibility of CLEO file handles and SA file handles -Note that you should really not pass CLEO file handles to game functions. However, this legacy mode now ensures that the handles are compatible. -Other file functions have also been updated ensuring that game file handles are passed to relevant game functions. -It is recommended to not rely on passing files to game functions and instead use CLEO 4's in-built file functions in future. - -#### 0AD1 - -CALL now accepts string input, which is passed as a string pointer following string convention - -#### 0AD4 - -SCAN_STRING now returns a condition result - -#### 0AE6 - -FIND_FIRST_FILE now accepts string array output - -#### 0AE3 - -FIND_ALL_RANDOM_OBJECTS_IN_SPHERE now ensures no fading objects are returned and returns -1 instead of 0 on failure - -#### 0AE2 - -FIND_ALL_RANDOM_CARS_IN_SPHERE now ensures no script vehicles or fading vehicle are returned and returns -1 instead of 0 on failure - -#### 0AE1 - -FIND_ALL_RANDOM_CHARS_IN_SPHERE now ensures no script characters or fading characters are returned and returns -1 instead of 0 on failure - -#### 0ADF - -ADD_TEXT_LABEL now updates existing text labels if they already exist - -#### 0AD6 - -IS_END_OF_FILE_REACHED now returns true if a file error occured - -#### 0AD2 - -GET_CHAR_PLAYER_IS_TARGETING now returns -1 instead of 0 when no target is found - -#### 0AB5 - -STORE_CLOSEST_ENTITIES now ensures no script entities or fading entities are returned and ensures the player ped is not returned +For older changes, see [CLEO4 changelog](https://github.com/cleolibrary/CLEO4/blob/master/CHANGELOG.md) From 7b17b200c1192151b29f48c7e385419512b8f07e Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 26 Oct 2023 19:37:26 +0200 Subject: [PATCH 045/216] Add callbacks for opcode processing events (#7) --- cleo_sdk/CLEO.h | 5 ++- source/CCustomOpcodeSystem.cpp | 70 ++++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index a2835139..01fa7b09 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -131,6 +131,8 @@ enum class eCallbackId : DWORD ScriptRegister, // void WINAPI OnScriptRegister(CRunningScript* pScript); // called after script creation ScriptUnregister, // void WINAPI OnScriptUnregister(CRunningScript* pScript); // called before script deletion ScriptProcess, // bool WINAPI OnScriptProcess(CRunningScript* pScript); // return false to skip this script processing + ScriptOpcodeProcess, // OpcodeResult WINAPI OnScriptOpcodeProcess(CRunningScript* pScript, DWORD opcode); // return other than OR_NONE to signal that opcode was handled in the callback + ScriptOpcodeProcessFinished, // OpcodeResult WINAPI OnScriptOpcodeProcessFinished(CRunningScript* pScript, DWORD opcode, OpcodeResult result); // return other than OR_NONE to overwrite original result ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); DrawingFinished, // void WINAPI OnDrawingFinished(); // called after game rendered everything and before presenting screen buffer Log, // void OnLog(eLogLevel level, const char* msg); @@ -275,9 +277,10 @@ static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!") enum OpcodeResult : char { + OR_NONE = -2, + OR_ERROR = -1, OR_CONTINUE = 0, OR_INTERRUPT = 1, - OR_ERROR = -1, }; typedef OpcodeResult (CALLBACK* _pOpcodeHandler)(CRunningScript*); diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index fb858900..01fa355d 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -165,45 +165,65 @@ namespace CLEO { // opcode handler for custom opcodes OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) { - /*std::ostringstream ss; - ss << thread->GetName() << " opcode " << opcodeToStr(opcode) << std::endl; - OutputDebugStringA(ss.str().c_str());//*/ - lastScript = thread; lastOpcode = opcode; lastOpcodePtr = (WORD*)thread->GetBytePointer() - 1; // rewind to the opcode start - if(opcode > LastCustomOpcode) + // execute registered callbacks + OpcodeResult result = OR_NONE; + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptOpcodeProcess)) { - SHOW_ERROR("Opcode [%04X] out of supported range! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return ErrorSuspendScript(thread); + typedef OpcodeResult WINAPI callback(CRunningScript*, DWORD); + result = ((callback*)func)(thread, opcode); + + if(result != OR_NONE) + break; // processed } - CustomOpcodeHandler handler = customOpcodeProc[opcode]; - if(handler != nullptr) + if(result == OR_NONE) // opcode not proccessed yet { - lastCustomOpcode = opcode; - return handler(thread); - } + if(opcode > LastCustomOpcode) + { + SHOW_ERROR("Opcode [%04X] out of supported range! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); + } - // Not registered as custom opcode. Call game's original handler + CustomOpcodeHandler handler = customOpcodeProc[opcode]; + if(handler != nullptr) + { + lastCustomOpcode = opcode; + return handler(thread); + } - if (opcode > LastOriginalOpcode) - { - SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return ErrorSuspendScript(thread); - } + // Not registered as custom opcode. Call game's original handler - size_t tableIdx = opcode / 100; // 100 opcodes peer handler table - auto result = originalOpcodeHandlers[tableIdx](thread, opcode); + if (opcode > LastOriginalOpcode) + { + SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); + } + + size_t tableIdx = opcode / 100; // 100 opcodes peer handler table + result = originalOpcodeHandlers[tableIdx](thread, opcode); - if(result == OR_ERROR) + if(result == OR_ERROR) + { + SHOW_ERROR("Opcode [%04X] not found! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return ErrorSuspendScript(thread); + } + } + + // execute registered callbacks + OpcodeResult callbackResult = OR_NONE; + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptOpcodeProcessFinished)) { - SHOW_ERROR("Opcode [%04X] not found! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return ErrorSuspendScript(thread); + typedef OpcodeResult WINAPI callback(CRunningScript*, DWORD, OpcodeResult); + auto res = ((callback*)func)(thread, opcode, result); + + callbackResult = max(res, callbackResult); // store result with highest value from all callbacks } - return result; + return (callbackResult != OR_NONE) ? callbackResult : result; } OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) @@ -221,7 +241,7 @@ namespace CLEO { for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsFinalize)) { - typedef void callback(void); + typedef void WINAPI callback(void); ((callback*)func)(); } From ba095df720f3fdf9a8bcd3bb83ca74937b075c83 Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 27 Oct 2023 13:04:55 -0400 Subject: [PATCH 046/216] Update CHANGELOG.md (#10) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb65fb15..5398a22c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE1**, **0AE2** and **0AE3** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `0:\` for _game root_ directory From 7f674cdeb362acbaa4c8e44f9fd6b6cb3dd5d956 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 27 Oct 2023 21:25:55 +0200 Subject: [PATCH 047/216] Fix for backward compatibility with mod loader (#9) * fix for backward compatibility with ModLoader * Fix for crash when script creation from file fails. --- source/CScriptEngine.cpp | 24 +++++++++++++++--------- source/FileEnumerator.h | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 640de6fb..6e5cd61a 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -983,27 +983,33 @@ namespace CLEO } // [game root]\cleo - std::string scriptsDir = CFileMgr::ms_rootDirName; + /*std::string scriptsDir = CFileMgr::ms_rootDirName; if (!scriptsDir.empty() && scriptsDir.back() != '\\') scriptsDir.push_back('\\'); - scriptsDir += "cleo"; + scriptsDir += "cleo";*/ + std::string scriptsDir = "cleo"; // TODO: restore to absolute path when ModLoader is updated to support CLEO5 TRACE("Searching for cleo scripts"); - FilesWalk(scriptsDir.c_str(), cs_ext, [this](const char* fullPath, const char* filename) { - auto cs = LoadScript(fullPath); - cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + CCustomScript* cs = nullptr; + FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) { + cs = LoadScript(fullPath); }); - FilesWalk(scriptsDir.c_str(), cs4_ext, [this](const char* fullPath, const char* filename) { - auto cs = LoadScript(fullPath); + FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) { + cs = LoadScript(fullPath); if (cs) cs->SetCompatibility(CLEO_VER_4); }); - FilesWalk(scriptsDir.c_str(), cs3_ext, [this](const char* fullPath, const char* filename) { - auto cs = LoadScript(fullPath); + FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) { + cs = LoadScript(fullPath); if (cs) cs->SetCompatibility(CLEO_VER_3); }); + if (cs != nullptr) + { + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + } + for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsLoaded)) { typedef void WINAPI callback(void); diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index de43c605..03d442b7 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -4,7 +4,7 @@ template void FilesWalk(const char* directory, const char* extension, T callback) { - try + /*try { for (auto& it : std::filesystem::directory_iterator(directory)) { @@ -28,5 +28,39 @@ void FilesWalk(const char* directory, const char* extension, T callback) catch (const std::exception& ex) { TRACE("Error while iterating directory: %s", ex.what()); + }*/ + + // Re-implemented with raw search APIs for compatibility with ModLoader. + // The ModLoader should be updated anyway to solve potential file access problems in more advanced Cleo scripts + + std::string pattern = directory; + if(!pattern.empty() && pattern.back() != '\\') pattern.push_back('\\'); + + std::string_view baseDir = pattern; + + pattern.push_back('*'); + if (extension != nullptr) pattern.append(extension); + + WIN32_FIND_DATA wfd = { 0 }; + HANDLE hSearch = FindFirstFile(pattern.c_str(), &wfd); + + if (hSearch == INVALID_HANDLE_VALUE) + { + TRACE("No files found in: %s", pattern.c_str()); + return; } + do + { + if (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + continue; // skip directories + } + + //auto result = std::filesystem::absolute(std::string(baseDir) + wfd.cFileName); + auto result = std::filesystem::path(std::string(baseDir) + wfd.cFileName); // ModLoader supports only relative paths... + callback(result.string().c_str(), result.filename().string().c_str()); + + } while (FindNextFile(hSearch, &wfd)); + + FindClose(hSearch); } From 91cdc4f55bde582ae6c60338a2f3727a2b9cf41e Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 28 Oct 2023 04:22:22 +0200 Subject: [PATCH 048/216] Paths resolving updates (#12) * Opcode 2001 returning only resolved paths. * Updates in path resolving. Added support for parent directory references "..\" in paths. Replaced virtual path numbers with text keywords. --- cleo_sdk/CLEO.h | 10 ++-- source/CCustomOpcodeSystem.cpp | 46 ++++++++++++----- source/CScriptEngine.cpp | 92 +++++++++++++++++++--------------- source/FileEnumerator.h | 3 +- 4 files changed, 92 insertions(+), 59 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 01fa7b09..bf72d10e 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -114,11 +114,11 @@ static eLogicalOperation& operator--(eLogicalOperation& o) } // CLEO virtual path prefixes. Expandable with CLEO_ResolvePath -const char DIR_GAME[] = "0:"; // game root directory -const char DIR_USER[] = "1:"; // game save directory -const char DIR_SCRIPT[] = "2:"; // current script directory -const char DIR_CLEO[] = "3:"; // game\cleo directory -const char DIR_MODULES[] = "4:"; // game\cleo\modules directory +const char DIR_GAME[] = "root:"; // game root directory +const char DIR_USER[] = "userfiles:"; // game save directory +const char DIR_SCRIPT[] = "."; // current script directory +const char DIR_CLEO[] = "cleo:"; // game\cleo directory +const char DIR_MODULES[] = "modules:"; // game\cleo\modules directory // argument of CLEO_RegisterCallback enum class eCallbackId : DWORD diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 01fa355d..1ecf69d2 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1485,15 +1485,29 @@ namespace CLEO { OpcodeResult __stdcall opcode_0A99(CRunningScript *thread) { auto paramType = *thread->GetBytePointer(); - if (paramType >= 1 && paramType <= 8) + if (paramType == DT_BYTE || + paramType == DT_WORD || + paramType == DT_DWORD || + paramType == DT_VAR || + paramType == DT_LVAR || + paramType == DT_VAR_ARRAY || + paramType == DT_LVAR_ARRAY) { // numbered predefined paths - DWORD param; - *thread >> param; + DWORD param; *thread >> param; + + const char* path; + switch(param) + { + case 0: path = DIR_GAME; break; + case 1: path = DIR_USER; break; + case 2: path = DIR_SCRIPT; break; + default: + LOG_WARNING("Value (%d) not known by opcode [0A99] in script %s", param, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return OR_CONTINUE; + } - std::string path = std::to_string(param); - path += ":"; - reinterpret_cast(thread)->SetWorkDir(path.c_str()); + reinterpret_cast(thread)->SetWorkDir(path); } else { @@ -2032,11 +2046,8 @@ namespace CLEO { break; default: - { SHOW_ERROR("Invalid type (%02X) of the first argument in opcode [0AB1] in script %s \nScript suspended.", *thread->GetBytePointer(), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } } ScmFunction* scmFunc = new ScmFunction(thread); @@ -3007,9 +3018,20 @@ namespace CLEO { if(fullPath != 0) { - std::ostringstream ss; - ss << script->GetScriptFileDir() << "\\" << script->GetScriptFileName(); - CLEO_WriteStringOpcodeParam(thread, ss.str().c_str()); + const size_t len = + strlen(script->GetScriptFileDir()) + + 1 + // path separator + strlen(script->GetScriptFileName()); + + std::string path; + path.reserve(len); + + path = script->GetScriptFileDir(); + path.push_back('\\'); + path.append(script->GetScriptFileName()); + path = script->ResolvePath(path.c_str()); // real absolute path + + CLEO_WriteStringOpcodeParam(thread, path.c_str()); } else CLEO_WriteStringOpcodeParam(thread, script->GetScriptFileName()); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 6e5cd61a..8b10064a 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -665,57 +665,69 @@ namespace CLEO return {}; } - std::string result; - if (strlen(path) < 2 || path[1] != ':') // does not start with drive letter + try { - result = (customWorkDir != nullptr) ? customWorkDir : GetWorkDir(); - if (!result.empty() && result.back() == '\\') result.pop_back(); + auto fsPath = std::filesystem::path(path); - if (strlen(path) > 0) + // check for virtual path root + enum class VPref{ None, Game, User, Script, Cleo, Modules } virtualPrefix = VPref::None; + auto root = fsPath.begin(); + if(root != fsPath.end()) { - if(!result.empty()) result.push_back('\\'); - result.append(path); + if(*root == DIR_GAME) virtualPrefix = VPref::Game; + else if (*root == DIR_USER) virtualPrefix = VPref::User; + else if (*root == DIR_SCRIPT) virtualPrefix = VPref::Script; + else if (*root == DIR_CLEO) virtualPrefix = VPref::Cleo; + else if (*root == DIR_MODULES) virtualPrefix = VPref::Modules; } - } - else - { - result = path; - } - // predefined CLEO paths starting with '[digit]:' - if (result.length() < 2 || result[1] != ':' || - result[0] < DIR_GAME[0] || result[0] > DIR_MODULES[0]) // supported range - { - return result; // not predefined path prefix found - } + // not virtual + if(virtualPrefix == VPref::None) + { + if(fsPath.is_relative()) + { + auto workDir = ResolvePath(GetWorkDir()); + fsPath = workDir / fsPath; + } - if (result[0] == DIR_USER[0]) // saves/settings location - { - return std::string(GetUserDirectory()) + &result[2]; // original path without '1:' prefix; - } + return std::filesystem::weakly_canonical(fsPath).string(); + } - if (result[0] == DIR_SCRIPT[0]) // current script location - { - std::string resolved = ResolvePath(GetScriptFileDir()); - resolved += &result[2]; // original path without '2:' prefix; - return resolved; - } + // expand virtual paths + std::filesystem::path resolved; - // game root directory - std::string resolved = CFileMgr::ms_rootDirName; - if(!resolved.empty() && resolved.back() == '\\') resolved.pop_back(); + if (virtualPrefix == VPref::User) // user files location + { + resolved = GetUserDirectory(); + } + else + if (virtualPrefix == VPref::Script) // this script's source file location + { + resolved = ResolvePath(GetScriptFileDir()); + } + else + { + // all remaing variants starts with game root + resolved = std::filesystem::path(CFileMgr::ms_rootDirName); + + switch(virtualPrefix) + { + case(VPref::Cleo): resolved /= "cleo"; break; + case(VPref::Modules): resolved /= "cleo\\cleo_modules"; break; + } + } - if (result[0] == DIR_CLEO[0]) // cleo directory - { - resolved += "\\cleo"; + // append all but virtual prefix from original path + for(auto it = ++fsPath.begin(); it != fsPath.end(); it++) + resolved /= *it; + + return std::filesystem::weakly_canonical(resolved).string(); // collapse "..\" uses } - else if (result[0] == DIR_MODULES[0]) // cleo modules directory + catch (const std::exception& ex) { - resolved += "\\cleo\\cleo_modules"; + TRACE("Error while resolving path: %s", ex.what()); + return {}; } - - resolved += &result[2]; // original path without 'X:' prefix - return resolved; } std::string CCustomScript::GetInfoStr(bool currLineInfo) const @@ -1299,7 +1311,7 @@ namespace CLEO // store script file directory and name std::filesystem::path path = szFileName; - path = std::filesystem::absolute(path); + path = std::filesystem::weakly_canonical(path); scriptFileDir = path.parent_path().string(); scriptFileName = path.filename().string(); diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index 03d442b7..e54188d9 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -56,8 +56,7 @@ void FilesWalk(const char* directory, const char* extension, T callback) continue; // skip directories } - //auto result = std::filesystem::absolute(std::string(baseDir) + wfd.cFileName); - auto result = std::filesystem::path(std::string(baseDir) + wfd.cFileName); // ModLoader supports only relative paths... + auto result = std::filesystem::weakly_canonical(std::string(baseDir) + wfd.cFileName); // will use CWD if input path was relative! callback(result.string().c_str(), result.filename().string().c_str()); } while (FindNextFile(hSearch, &wfd)); From 1ff1b4b6261c7b1588b7fc40ce3fe8862682891d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 29 Oct 2023 16:04:55 +0100 Subject: [PATCH 049/216] Error messages update (#13) --- cleo_sdk/CLEO.h | 65 ++++++++ source/CCustomOpcodeSystem.cpp | 288 ++++++++++++++++++++------------- source/CCustomOpcodeSystem.h | 7 +- source/CDebug.cpp | 39 ++++- source/CDebug.h | 2 + source/CScriptEngine.cpp | 4 +- source/CleoBase.cpp | 1 + 7 files changed, 291 insertions(+), 115 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index bf72d10e..eef1eb44 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -66,6 +66,71 @@ enum eDataType : int DT_VAR_STRING_ARRAY, DT_LVAR_STRING_ARRAY }; +static const char* ToStr(eDataType type) +{ + switch (type) + { + case DT_END: return "VArgEnd"; break; + case DT_DWORD: return "Int32"; break; + case DT_VAR: return "GlobVar"; break; + case DT_LVAR: return "LocVar"; break; + case DT_BYTE: return "Int8"; break; + case DT_WORD: return "Int16"; break; + case DT_FLOAT: return "Float32"; break; + case DT_VAR_ARRAY: return "GlobVarArr"; break; + case DT_LVAR_ARRAY: return "LocVarArr"; break; + case DT_TEXTLABEL: return "STxt"; break; + case DT_VAR_TEXTLABEL: return "GlobVarSTxt"; break; + case DT_LVAR_TEXTLABEL: return "LocVarSTxt"; break; + case DT_VAR_TEXTLABEL_ARRAY: return "GlobVarSTxtArr"; break; + case DT_LVAR_TEXTLABEL_ARRAY: return "LocVarSTxtArr"; break; + case DT_VARLEN_STRING: return "Txt"; break; + case DT_STRING: return "LTxt"; break; + case DT_VAR_STRING: return "GlobVarLTxt"; break; + case DT_LVAR_STRING: return "LocVarLTxt"; break; + case DT_VAR_STRING_ARRAY: return "GlobVarLTxtArr"; break; + case DT_LVAR_STRING_ARRAY: return "LocVarLTxtArr"; break; + default: return "corrupted"; + } +} +static const char* ToKindStr(eDataType type) +{ + switch (type) + { + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + return "int"; break; + + case DT_FLOAT: + return "float"; break; + + case DT_STRING: + case DT_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_LVAR_STRING: + case DT_LVAR_STRING_ARRAY: + case DT_VAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_VARLEN_STRING: + return "string"; break; + + case DT_VAR: + case DT_VAR_ARRAY: + case DT_LVAR: + case DT_LVAR_ARRAY: + return "variable"; break; + + case DT_END: + return "varArgEnd"; break; + + default: + return "corrupted"; break; + } +} const size_t MAX_STR_LEN = 0xff; // max length of string type parameter diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 1ecf69d2..2672c1cb 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -9,7 +9,12 @@ #include #include -namespace CLEO { +#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } + +namespace CLEO +{ DWORD FUNC_fopen; DWORD FUNC_fclose; DWORD FUNC_fwrite; @@ -161,10 +166,15 @@ namespace CLEO { WORD CCustomOpcodeSystem::lastOpcode = 0; WORD* CCustomOpcodeSystem::lastOpcodePtr = nullptr; WORD CCustomOpcodeSystem::lastCustomOpcode = 0; + std::string lastErrorMsg = {}; + WORD CCustomOpcodeSystem::prevOpcode = 0; + // opcode handler for custom opcodes OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) { + prevOpcode = lastOpcode; + lastScript = thread; lastOpcode = opcode; lastOpcodePtr = (WORD*)thread->GetBytePointer() - 1; // rewind to the opcode start @@ -465,7 +475,7 @@ namespace CLEO { break; default: - LOG_WARNING("Reading integer from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -489,7 +499,7 @@ namespace CLEO { break; default: - LOG_WARNING("Writing integer into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].dwParam = uval; @@ -513,7 +523,7 @@ namespace CLEO { break; default: - LOG_WARNING("Reading integer from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -537,7 +547,7 @@ namespace CLEO { break; default: - LOG_WARNING("Writing integer into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].nParam = nval; @@ -558,7 +568,7 @@ namespace CLEO { break; default: - LOG_WARNING("Reading float from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -578,7 +588,7 @@ namespace CLEO { break; default: - LOG_WARNING("Writing float into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING("Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].fParam = fval; @@ -640,6 +650,9 @@ namespace CLEO { { static char internal_buf[MAX_STR_LEN]; if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } + const auto bufLength = bufSize ? bufSize - 1 : 0; // max text length (minus terminator char) + + lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); switch(paramType) @@ -652,23 +665,27 @@ namespace CLEO { case DT_LVAR_ARRAY: { GetScriptParams(thread, 1); + + if(opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) + { + lastErrorMsg = (opcodeParams[0].dwParam == 0) ? + "Reading string from 'null' pointer argument" : + stringPrintf("Reading string from invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + + return nullptr; // error, target buffer untouched + } + char* str = opcodeParams[0].pcParam; + auto length = strlen(str); - size_t length; - if(str != nullptr) - length = strlen(str); - else + if(length > bufLength) { - length = 0; - LOG_WARNING("Reading string from null pointer in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + length = bufLength; // clamp to target buffer size } - if(bufSize > 0) - length = min(length, bufSize - 1); // minus terminator char - else - length = 0; // no target buffer - if (length) strncpy(buf, str, length); + if (bufSize > 0) buf[length] = '\0'; // string terminator return buf; } @@ -701,10 +718,11 @@ namespace CLEO { char* str = (char*)thread->GetBytePointer(); thread->IncPtr(length); // text data - if (bufSize > 0) - length = min(length, bufSize - 1); // minus terminator char - else - length = 0; // no target buffer + if (length > bufLength) + { + lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + length = bufLength; // clamp to target buffer size + } if (length) strncpy(buf, str, length); if (bufSize > 0) buf[length] = '\0'; // string terminator @@ -721,8 +739,8 @@ namespace CLEO { // unsupported param type GetScriptParams(thread, 1); // skip unhandled param - SHOW_ERROR("Reading string from invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return nullptr; + lastErrorMsg = stringPrintf("Reading string argument, got %s", ToKindStr(paramType)); + return nullptr; // error, target buffer untouched } // write output\result string parameter @@ -749,6 +767,8 @@ namespace CLEO { char* targetBuff; DWORD targetSize; + lastErrorMsg.clear(); + auto paramType = CLEO_GetOperandType(thread); switch(paramType) { @@ -759,6 +779,12 @@ namespace CLEO { case DT_VAR_ARRAY: case DT_LVAR_ARRAY: GetScriptParams(thread, 1); + + if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) + { + lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + return { nullptr, 0 }; // error + } return { opcodeParams[0].pcParam, 0x7FFFFFFF }; // user allocated memory block can be any size // short string variable @@ -777,9 +803,9 @@ namespace CLEO { default: { - SHOW_ERROR("Outputing string into invalid argument type (%02X) in script %s", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); + lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param - return { nullptr, 0 }; + return { nullptr, 0 }; // error } } } @@ -792,11 +818,14 @@ namespace CLEO { char* outIter = outputStr; char bufa[256], fmtbufa[64], *fmta; + lastErrorMsg.clear(); + // invalid input arguments - if(outputStr == nullptr || len == 0) + if(outputStr == nullptr || len == 0) { + lastErrorMsg = "Need target buffer to read formatted string"; SkipUnusedVarArgs(thread); - return -1; + return -1; // error } if(len > 1 && format != nullptr) @@ -874,14 +903,29 @@ namespace CLEO { { case 's': { - static const char none[] = "(null)"; if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; - const char *astr = ReadStringParam(thread, bufa, sizeof(bufa)); - const char *striter = astr ? astr : none; - while (*striter) + + const char* str = ReadStringParam(thread, bufa, sizeof(bufa)); + if(str == nullptr) // read error + { + if(lastErrorMsg.find("'null' pointer") != std::string::npos) + { + static const char none[] = "(null)"; + str = none; + } + else + { + // lastErrorMsg already set by ReadStringParam + SkipUnusedVarArgs(thread); + outputStr[written] = '\0'; + return -1; // error + } + } + + while (*str) { if (written++ >= len) goto _ReadFormattedString_OutOfMemory; - *outIter++ = *striter++; + *outIter++ = *str++; } iter++; break; @@ -941,28 +985,30 @@ namespace CLEO { if (written >= len) { - _ReadFormattedString_OutOfMemory: // jump here on error - LOG_WARNING("Read formatted string error: Insufficient output buffer size in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + _ReadFormattedString_OutOfMemory: // jump here on error + + lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole formatted string", len); SkipUnusedVarArgs(thread); outputStr[len - 1] = '\0'; - return -1; + return -1; // error } // still more var-args available if (CLEO_GetOperandType(thread) != DT_END) { - LOG_WARNING("Read formatted string: Found more params than format slots in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + lastErrorMsg = "More params than slots in formatted string"; + LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } SkipUnusedVarArgs(thread); // skip terminator too outputStr[written] = '\0'; return (int)written; - _ReadFormattedString_ArgMissing: // jump here on error - LOG_WARNING("Read formatted string: Not enough arguments to fulfill specified format in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + _ReadFormattedString_ArgMissing: // jump here on error + lastErrorMsg = "Less params than slots in formatted string"; thread->IncPtr(); // skip vararg terminator outputStr[written] = '\0'; - return (int)written; + return -1; // error } // Legacy modes for CLEO 3 @@ -1383,7 +1429,9 @@ namespace CLEO { //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + + auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName()); auto cs = new CCustomScript(filename.c_str()); @@ -1421,7 +1469,9 @@ namespace CLEO { //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread), DIR_CLEO); // legacy: default search location is game\cleo directory + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + + auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName()); @@ -1511,7 +1561,8 @@ namespace CLEO { } else { - reinterpret_cast(thread)->SetWorkDir(ReadStringParam(thread)); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + reinterpret_cast(thread)->SetWorkDir(path); } return OR_CONTINUE; } @@ -1519,7 +1570,9 @@ namespace CLEO { //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + + auto filename = reinterpret_cast(thread)->ResolvePath(path); auto paramType = *thread->GetBytePointer(); char mode[0x10]; @@ -1541,8 +1594,8 @@ namespace CLEO { } else { - // string param - ReadStringParam(thread, mode, sizeof(mode)); + auto modeOk = ReadStringParam(thread, mode, sizeof(mode)); + OPCODE_VALIDATE_STR_ARG_READ(modeOk) } if (auto hfile = open_file(filename.c_str(), mode, bLegacyMode)) @@ -1642,7 +1695,9 @@ namespace CLEO { //0AA2=2,%2h% = load_library %1d% // IF and SET OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + + auto filename = reinterpret_cast(thread)->ResolvePath(path); auto libHandle = LoadLibrary(filename.c_str()); *thread << libHandle; @@ -1665,7 +1720,8 @@ namespace CLEO { //0AA4=3,%3d% = get_proc_address %1d% library %2d% // IF and SET OpcodeResult __stdcall opcode_0AA4(CRunningScript *thread) { - char *funcName = ReadStringParam(thread); + auto funcName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(funcName) + HMODULE libHandle; *thread >> libHandle; void *funcAddr = (void *)GetProcAddress(libHandle, funcName); @@ -1943,7 +1999,7 @@ namespace CLEO { //0AAA=2, %2d% = thread %1d% pointer // IF and SET OpcodeResult __stdcall opcode_0AAA(CRunningScript *thread) { - char *threadName = ReadStringParam(thread); + auto threadName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(threadName) threadName[7] = '\0'; CRunningScript *cs = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); if (!cs) cs = GetInstance().ScriptEngine.FindScriptNamed(threadName); @@ -1955,7 +2011,8 @@ namespace CLEO { //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { - auto filename = reinterpret_cast(thread)->ResolvePath(ReadStringParam(thread)); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto filename = reinterpret_cast(thread)->ResolvePath(path); auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); *thread << stream; @@ -2018,7 +2075,8 @@ namespace CLEO { int label = 0; char* moduleTxt = nullptr; - switch (*thread->GetBytePointer()) + auto paramType = CLEO_GetOperandType(thread); + switch (paramType) { // label of current script case DT_DWORD: @@ -2046,7 +2104,7 @@ namespace CLEO { break; default: - SHOW_ERROR("Invalid type (%02X) of the first argument in opcode [0AB1] in script %s \nScript suspended.", *thread->GetBytePointer(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type (%s) of the first argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } @@ -2295,7 +2353,8 @@ namespace CLEO { //0ABA=1,end_custom_thread_named %1d% OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) { - char *threadName = ReadStringParam(thread); + auto threadName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(threadName) + auto deleted_thread = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); if (deleted_thread) { @@ -2365,7 +2424,9 @@ namespace CLEO { //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET OpcodeResult __stdcall opcode_0AC1(CRunningScript *thread) { - auto stream = GetInstance().SoundSystem.LoadStream(ReadStringParam(thread), true); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + + auto stream = GetInstance().SoundSystem.LoadStream(path, true); *thread << stream; SetScriptCondResult(thread, stream != nullptr); return OR_CONTINUE; @@ -2456,14 +2517,15 @@ namespace CLEO { //0ACA=1,show_text_box %1d% OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) { - PrintHelp(ReadStringParam(thread)); + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + PrintHelp(text); return OR_CONTINUE; } //0ACB=3,show_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) { - auto text = ReadStringParam(thread); + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) DWORD time; *thread >> time; DWORD style; *thread >> style; @@ -2474,7 +2536,7 @@ namespace CLEO { //0ACC=2,show_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) { - auto text = ReadStringParam(thread); + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) DWORD time; *thread >> time; Print(text, time); @@ -2484,7 +2546,7 @@ namespace CLEO { //0ACD=2,show_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) { - auto text = ReadStringParam(thread); + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) DWORD time; *thread >> time; PrintNow(text, time); @@ -2494,8 +2556,8 @@ namespace CLEO { //0ACE=-1,show_formatted_text_box %1d% OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread) { - auto format = ReadStringParam(thread); - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintHelp(text); return OR_CONTINUE; @@ -2504,10 +2566,10 @@ namespace CLEO { //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread) { - auto format = ReadStringParam(thread); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) DWORD time; *thread >> time; DWORD style; *thread >> style; - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintBig(text, time, style); return OR_CONTINUE; @@ -2516,9 +2578,9 @@ namespace CLEO { //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread) { - auto format = ReadStringParam(thread); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) DWORD time; *thread >> time; - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) Print(text, time); return OR_CONTINUE; @@ -2527,9 +2589,9 @@ namespace CLEO { //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread) { - auto format = ReadStringParam(thread); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) DWORD time; *thread >> time; - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintNow(text, time); return OR_CONTINUE; @@ -2559,10 +2621,9 @@ namespace CLEO { //0AD3=-1,string %1d% format %2d% ... OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) { - auto resultArg = GetStringParamWriteBuffer(thread); - - auto format = ReadStringParam(thread); - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.first) + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) if (resultArg.first != nullptr && resultArg.second > 0) { @@ -2579,9 +2640,9 @@ namespace CLEO { //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread) { - char fmt[MAX_STR_LEN], *format, *src; - src = ReadStringParam(thread); - format = ReadStringParam(thread, fmt, sizeof(fmt)); + auto src = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(src) + char fmt[MAX_STR_LEN]; + auto format = ReadStringParam(thread, fmt, sizeof(fmt)); OPCODE_VALIDATE_STR_ARG_READ(format) size_t cExParams = 0; int *result = (int *)GetScriptParamPointer(thread); @@ -2652,14 +2713,16 @@ namespace CLEO { //0AD8=2,write_string_to_file %1d% from %2d% //IF and SET OpcodeResult __stdcall opcode_0AD8(CRunningScript *thread) { - DWORD hFile; - *thread >> hFile; + DWORD hFile; *thread >> hFile; + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + if (FILE * file = convert_handle_to_file(hFile)) { - SetScriptCondResult(thread, fputs(ReadStringParam(thread), file) > 0); + SetScriptCondResult(thread, fputs(text, file) > 0); fflush(file); } - else { + else + { SetScriptCondResult(thread, false); } return OR_CONTINUE; @@ -2669,8 +2732,8 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AD9(CRunningScript *thread) { DWORD hFile; *thread >> hFile; - auto format = ReadStringParam(thread); - char text[MAX_STR_LEN]; ReadFormattedString(thread, text, sizeof(text), format); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) if (FILE * file = convert_handle_to_file(hFile)) { @@ -2684,10 +2747,9 @@ namespace CLEO { OpcodeResult __stdcall opcode_0ADA(CRunningScript *thread) { DWORD hFile; *thread >> hFile; - auto format = ReadStringParam(thread); + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) int *result = (int *)GetScriptParamPointer(thread); - size_t cExParams = 0; SCRIPT_VAR *ExParams[35]; // read extra params @@ -2733,7 +2795,8 @@ namespace CLEO { //0ADC=1, test_cheat %1d% OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - SetScriptCondResult(thread, TestCheat(ReadStringParam(thread))); + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + SetScriptCondResult(thread, TestCheat(text)); return OR_CONTINUE; } @@ -2758,11 +2821,15 @@ namespace CLEO { //0ADE=2,%2d% = text_by_GXT_entry %1d% OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread) { - const char *gxt = ReadStringParam(thread); + auto gxt = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(gxt) + if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread << GetInstance().TextManager.Get(gxt); else - strcpy((char *)GetScriptParamPointer(thread), GetInstance().TextManager.Get(gxt)); + { + auto ok = WriteStringParam(thread, GetInstance().TextManager.Get(gxt)); OPCODE_VALIDATE_STR_ARG_WRITE(ok) + } + return OR_CONTINUE; } @@ -2770,9 +2837,8 @@ namespace CLEO { OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) { char gxtLabel[8]; // 7 + terminator character - ReadStringParam(thread, gxtLabel, sizeof(gxtLabel)); - - char *text = ReadStringParam(thread); + auto gxtOk = ReadStringParam(thread, gxtLabel, sizeof(gxtLabel)); OPCODE_VALIDATE_STR_ARG_READ(gxtOk) + auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) GetInstance().TextManager.AddFxt(gxtLabel, text); return OR_CONTINUE; @@ -2781,7 +2847,9 @@ namespace CLEO { //0AE0=1,remove_dynamic_GXT_entry %1d% OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread) { - GetInstance().TextManager.RemoveFxt(ReadStringParam(thread)); + auto gxt = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(gxt) + + GetInstance().TextManager.RemoveFxt(gxt); return OR_CONTINUE; } @@ -2949,15 +3017,11 @@ namespace CLEO { OpcodeResult __stdcall opcode_0AED(CRunningScript *thread) { // this opcode is useless now - float val; - char *format, *result; - *thread >> val; - format = ReadStringParam(thread); - if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) - *thread >> result; - else - result = &GetScriptParamPointer(thread)->cParam; - sprintf(result, format, val); + float val; *thread >> val; + auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.first) + + sprintf(resultArg.first, format, val); return OR_CONTINUE; } @@ -2989,9 +3053,9 @@ namespace CLEO { //2000=2,%2s% = resolve_filepath %1s% OpcodeResult __stdcall opcode_2000(CRunningScript* thread) { - auto path = CLEO_ReadStringOpcodeParam(thread); - CLEO_ResolvePath(thread, path, MAX_STR_LEN); - CLEO_WriteStringOpcodeParam(thread, path); + auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto resolved = reinterpret_cast(thread)->ResolvePath(path); + auto ok = WriteStringParam(thread, resolved.c_str()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) return OR_CONTINUE; } @@ -3031,10 +3095,12 @@ namespace CLEO { path.append(script->GetScriptFileName()); path = script->ResolvePath(path.c_str()); // real absolute path - CLEO_WriteStringOpcodeParam(thread, path.c_str()); + auto ok = WriteStringParam(thread, path.c_str()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) } else - CLEO_WriteStringOpcodeParam(thread, script->GetScriptFileName()); + { + auto ok = WriteStringParam(thread, script->GetScriptFileName()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) + } SetScriptCondResult(thread, true); return OR_CONTINUE; @@ -3094,12 +3160,18 @@ extern "C" LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) { - return ReadStringParam(thread, buf, size); + auto result = ReadStringParam(thread, buf, size); + + if (result == nullptr) + LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + + return result; } void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { - WriteStringParam(thread, str); + if(!WriteStringParam(thread, str)) + LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int bufSize) @@ -3108,12 +3180,10 @@ extern "C" if (!buf) { buf = internal_buf; bufSize = sizeof(internal_buf); } if (!bufSize) bufSize = MAX_STR_LEN; - if(format != nullptr && strlen(format) > 0) - ReadFormattedString(thread, buf, bufSize, format); - else + if(ReadFormattedString(thread, buf, bufSize, format) == -1) // error? { - SkipUnusedVarArgs(thread); - if(bufSize > 0) buf[0] = '\0'; + LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return nullptr; // error } return buf; diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 6a0a023f..4835b993 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -19,6 +19,8 @@ namespace CLEO class CCustomOpcodeSystem : public VInjectible { public: + static const size_t MinValidAddress = 0x10000; // used for validation of pointers received from scripts. First 64kb are for sure reserved by Windows. + static const size_t LastOriginalOpcode = 0x0A4E; // GTA SA static const size_t LastCustomOpcode = 0x7FFF; @@ -27,6 +29,8 @@ namespace CLEO static WORD lastOpcode; static WORD* lastOpcodePtr; static WORD lastCustomOpcode; + static std::string lastErrorMsg; + static WORD prevOpcode; // previous void FinalizeScriptObjects(); @@ -34,7 +38,8 @@ namespace CLEO virtual void Inject(CCodeInjector& inj); ~CCustomOpcodeSystem() { - TRACE("Last opcode executed %04X", lastOpcode); + TRACE("Last opcode executed: %04X", lastOpcode); + TRACE("Previous opcode executed: %04X", prevOpcode); } static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); diff --git a/source/CDebug.cpp b/source/CDebug.cpp index 00c73f36..3dc8d445 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -1,10 +1,28 @@ #include "stdafx.h" #include "CDebug.h" #include "CleoBase.h" +#include CDebug Debug; using namespace CLEO; +std::string stringPrintf(const char* format, ...) +{ + va_list args; + + va_start(args, format); + auto len = std::vsnprintf(nullptr, 0, format, args) + 1; + va_end(args); + + std::string result(len, '\0'); + + va_start(args, format); + std::vsnprintf(result.data(), result.length(), format, args); + va_end(args); + + return result; +} + void CDebug::Trace(eLogLevel level, const char* format, ...) { va_list args; @@ -59,12 +77,25 @@ void CDebug::Error(const char* format, ...) auto msg = TraceVArg(eLogLevel::Error, format, args); va_end(args); + QUERY_USER_NOTIFICATION_STATE pquns; + SHQueryUserNotificationState(&pquns); + bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); + auto mainWnd = GetInstance().MainWnd; - PostMessage(mainWnd, WM_SYSCOMMAND, SC_MINIMIZE, 0); - ShowWindow(mainWnd, SW_MINIMIZE); + + if(fullscreen) + { + PostMessage(mainWnd, WM_SYSCOMMAND, SC_MINIMIZE, 0); + ShowWindow(mainWnd, SW_MINIMIZE); + } + MessageBox(mainWnd, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); - PostMessage(mainWnd, WM_SYSCOMMAND, SC_RESTORE, 0); - ShowWindow(mainWnd, SW_RESTORE); + + if (fullscreen) + { + PostMessage(mainWnd, WM_SYSCOMMAND, SC_RESTORE, 0); + ShowWindow(mainWnd, SW_RESTORE); + } } extern "C" diff --git a/source/CDebug.h b/source/CDebug.h index e2129132..afa9b045 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -7,6 +7,8 @@ const char szLogFileName[] = "cleo.log"; +std::string stringPrintf(const char* format, ...); + class CDebug { public: diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 8b10064a..985e1bff 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1000,7 +1000,7 @@ namespace CLEO scriptsDir += "cleo";*/ std::string scriptsDir = "cleo"; // TODO: restore to absolute path when ModLoader is updated to support CLEO5 - TRACE("Searching for cleo scripts"); + TRACE("Searching for CLEO scripts"); CCustomScript* cs = nullptr; FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) { @@ -1027,6 +1027,8 @@ namespace CLEO typedef void WINAPI callback(void); ((callback*)func)(); } + + TRACE("Scripts search done."); } CCustomScript * CScriptEngine::LoadScript(const char * szFilePath) diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index fe8f03ad..244f9107 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -50,6 +50,7 @@ namespace CLEO CodeInjector.ReplaceFunction(OnDrawingFinished, 0x00734640); // nullsub_63 - originally something like renderDebugStuff? m_bStarted = true; + TRACE("CLEO instance started successfully!"); } void CCleoInstance::Stop() From f6e1f6dae806b4306d2d43db39948b8c09aad2bf Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Oct 2023 04:13:30 +0100 Subject: [PATCH 050/216] Fix string view of reallocated string. (#14) --- source/FileEnumerator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index e54188d9..a73852d3 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -36,7 +36,7 @@ void FilesWalk(const char* directory, const char* extension, T callback) std::string pattern = directory; if(!pattern.empty() && pattern.back() != '\\') pattern.push_back('\\'); - std::string_view baseDir = pattern; + const size_t baseDirLen = pattern.length(); pattern.push_back('*'); if (extension != nullptr) pattern.append(extension); @@ -56,7 +56,7 @@ void FilesWalk(const char* directory, const char* extension, T callback) continue; // skip directories } - auto result = std::filesystem::weakly_canonical(std::string(baseDir) + wfd.cFileName); // will use CWD if input path was relative! + auto result = std::filesystem::weakly_canonical(pattern.substr(0, baseDirLen) + wfd.cFileName); // will use CWD if input path was relative! callback(result.string().c_str(), result.filename().string().c_str()); } while (FindNextFile(hSearch, &wfd)); From 93757b02cae032ac644d860aa28766983c3810da Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Oct 2023 04:25:48 +0100 Subject: [PATCH 051/216] Http links of debug opcodes fixed. (#15) --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5398a22c..383e2026 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,11 @@ - support for CLEO modules feature https://github.com/sannybuilder/dev/issues/264 - new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin - - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/CLEO/00C3))** - - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/CLEO/00C4))** - - new opcode **00CC ([breakpoint](https://library.sannybuilder.com/#/sa/CLEO/00CC))** - - new opcode **00CD ([trace](https://library.sannybuilder.com/#/sa/CLEO/00CD))** - - new opcode **00CE ([log_to_file](https://library.sannybuilder.com/#/sa/CLEO/00CE))** + - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/debug/00C3))** + - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/debug/00C4))** + - new opcode **00CC ([breakpoint](https://library.sannybuilder.com/#/sa/debug/00CC))** + - new opcode **00CD ([trace](https://library.sannybuilder.com/#/sa/debug/00CD))** + - new opcode **00CE ([log_to_file](https://library.sannybuilder.com/#/sa/debug/00CE))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) - new and updated opcodes - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** From a3c69241c2ccea3bf8347d8f22fc21ab4ed288b8 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Oct 2023 05:26:43 +0100 Subject: [PATCH 052/216] support UAL's hack of find file APIs (#16) --- source/FileEnumerator.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index a73852d3..a45cfc16 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -56,7 +56,13 @@ void FilesWalk(const char* directory, const char* extension, T callback) continue; // skip directories } - auto result = std::filesystem::weakly_canonical(pattern.substr(0, baseDirLen) + wfd.cFileName); // will use CWD if input path was relative! + std::string path; + if (std::filesystem::path(wfd.cFileName).is_absolute()) + path = wfd.cFileName; // somebody hacked findFirstFile APIs and is providing us absolute path + else + path = pattern.substr(0, baseDirLen) + wfd.cFileName; // standard + + auto result = std::filesystem::weakly_canonical(path); // will use CWD if input path was relative! callback(result.string().c_str(), result.filename().string().c_str()); } while (FindNextFile(hSearch, &wfd)); From 1d34952e904d09dce8d9831fd51c0a52dfe5894c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Oct 2023 05:52:23 +0100 Subject: [PATCH 053/216] updated virtual paths info (#17) --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 383e2026..764ad1a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,11 @@ - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - - `0:\` for _game root_ directory - - `1:\` for _game save files_ directory - - `2:\` for _this script file_ directory - - `3:\` for _CLEO_ directory - - `4:\` for _CLEO\cleo_modules_ directory + - `root:\` for _game root_ directory + - `userfiles:\` for _game save files_ directory + - `.\` for _this script file_ directory + - `cleo:\` for _CLEO_ directory + - `modules:\` for _CLEO\cleo_modules_ directory - rewritten opcode **0A99 (set_current_directory)**. It no longer affects internal game state and other scripts - improved error handling - more detailed error messages in some scenarios From f43116fba57206fed2d0b97359e9de86071b3422 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 1 Nov 2023 02:36:34 +0100 Subject: [PATCH 054/216] debug opcodes renumbered to unoccupied ids (#20) --- CHANGELOG.md | 6 +++--- cleo_plugins/DebugUtils/DebugUtils.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 764ad1a3..7466c1ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,9 @@ - new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/debug/00C3))** - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/debug/00C4))** - - new opcode **00CC ([breakpoint](https://library.sannybuilder.com/#/sa/debug/00CC))** - - new opcode **00CD ([trace](https://library.sannybuilder.com/#/sa/debug/00CD))** - - new opcode **00CE ([log_to_file](https://library.sannybuilder.com/#/sa/debug/00CE))** + - new opcode **2100 ([breakpoint](https://library.sannybuilder.com/#/sa/debug/2100))** + - new opcode **2101 ([trace](https://library.sannybuilder.com/#/sa/debug/2101))** + - new opcode **2102 ([log_to_file](https://library.sannybuilder.com/#/sa/debug/2102))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) - new and updated opcodes - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index 25cc482f..db83becf 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -40,9 +40,9 @@ class DebugUtils // register opcodes CLEO_RegisterOpcode(0x00C3, Opcode_DebugOn); CLEO_RegisterOpcode(0x00C4, Opcode_DebugOff); - CLEO_RegisterOpcode(0x00CC, Opcode_Breakpoint); - CLEO_RegisterOpcode(0x00CD, Opcode_Trace); - CLEO_RegisterOpcode(0x00CE, Opcode_LogToFile); + CLEO_RegisterOpcode(0x2100, Opcode_Breakpoint); + CLEO_RegisterOpcode(0x2101, Opcode_Trace); + CLEO_RegisterOpcode(0x2102, Opcode_LogToFile); // original Rockstar's script debugging opcodes if(GetPrivateProfileInt("General", "LegacyDebugOpcodes", 0, config.c_str()) != 0) @@ -185,7 +185,7 @@ class DebugUtils return OR_CONTINUE; } - // 00CC=-1, breakpoint ... + // 2100=-1, breakpoint ... static OpcodeResult WINAPI Opcode_Breakpoint(CScriptThread* thread) { if (!CLEO_GetScriptDebugMode(thread)) @@ -232,7 +232,7 @@ class DebugUtils return OR_INTERRUPT; } - // 00CD=-1, trace %1s% ... + // 2101=-1, trace %1s% ... static OpcodeResult WINAPI Opcode_Trace(CScriptThread* thread) { if (!CLEO_GetScriptDebugMode(thread)) @@ -248,7 +248,7 @@ class DebugUtils return OR_CONTINUE; } - // 00CE=-1, log_to_file %1s% timestamp %2d% text %3s% ... + // 2102=-1, log_to_file %1s% timestamp %2d% text %3s% ... static OpcodeResult WINAPI Opcode_LogToFile(CScriptThread* thread) { auto filestr = CLEO_ReadStringOpcodeParam(thread); From 2729a3cf09c4baf478fd0b37b061ae53f5ea7e2a Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 1 Nov 2023 02:54:27 +0100 Subject: [PATCH 055/216] Check for multiple CLEO.asi instances loaded (#19) * Checking for duplicated CLEO.asi * Checking for duplicated CLEO.asi --- CLEO5.vcxproj | 1 + source/Singleton.h | 38 ++++++++++++++++++++++++++++++++++++++ source/dllmain.cpp | 1 + 3 files changed, 40 insertions(+) create mode 100644 source/Singleton.h diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 7ede7b03..c82564ea 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -77,6 +77,7 @@ + diff --git a/source/Singleton.h b/source/Singleton.h new file mode 100644 index 00000000..76f9f34f --- /dev/null +++ b/source/Singleton.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include + +class _CleoSingleton +{ +public: + _CleoSingleton() + { + MODULEENTRY32 module; + module.dwSize = sizeof(MODULEENTRY32); + HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()); + + Module32First(snapshot, &module); + if (snapshot != INVALID_HANDLE_VALUE) + { + size_t count = 0; + do + { + if (_strcmpi(module.szModule, "CLEO.asi") == 0) + { + count++; + + if(count > 1) + { + CloseHandle(snapshot); + MessageBox(NULL, "Another copy of CLEO.asi is already loaded!\nPlease remove duplicated files.", "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + exit(1); + break; + } + } + } while (Module32Next(snapshot, &module)); + + CloseHandle(snapshot); + } + } +} CleoSingleton; + diff --git a/source/dllmain.cpp b/source/dllmain.cpp index abdbd40b..8409f042 100644 --- a/source/dllmain.cpp +++ b/source/dllmain.cpp @@ -1,4 +1,5 @@ #include "stdafx.h" +#include "Singleton.h" #include "CleoBase.h" #include "CDebug.h" From 7c6e21ba8e42ea26b07f76cf54045dc700f55005 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 1 Nov 2023 02:57:07 +0100 Subject: [PATCH 056/216] Cleo return arguments count check (#18) --- cleo_sdk/CLEO.h | 2 +- source/CCustomOpcodeSystem.cpp | 75 +++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index eef1eb44..28ab1a41 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -370,7 +370,7 @@ void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); eDataType WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peek parameter data type -DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek var-args count +DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek remaining var-args count extern SCRIPT_VAR* opcodeParams; extern SCRIPT_VAR* missionLocals; diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 2672c1cb..e5348138 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -2075,7 +2075,7 @@ namespace CLEO int label = 0; char* moduleTxt = nullptr; - auto paramType = CLEO_GetOperandType(thread); + auto paramType = (eDataType)*thread->GetBytePointer(); switch (paramType) { // label of current script @@ -2104,7 +2104,7 @@ namespace CLEO break; default: - SHOW_ERROR("Invalid type (%s) of the first argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } @@ -2142,13 +2142,41 @@ namespace CLEO label = scriptRef.offset; } - DWORD nParams = 0; - if(*thread->GetBytePointer()) *thread >> nParams; - if(nParams > 32) + // "number of input parameters" opcode argument + DWORD nParams; + paramType = (eDataType)*thread->GetBytePointer(); + switch (paramType) { - SHOW_ERROR("Argument count (%d), out of supported range (32) of opcode [0AB1] in script %s", nParams, ((CCustomScript*)thread)->GetInfoStr().c_str()); + case DT_END: + nParams = 0; + break; - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + // literal integers + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + *thread >> nParams; + break; + + default: + SHOW_ERROR("Invalid type of first argument in opcode [0AB1], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + if (nParams) + { + auto nVarArg = GetVarArgCount(thread); + if (nParams > nVarArg) // if less it means there are return params too + { + SHOW_ERROR("Opcode [0AB1] declared %d input args, but provided %d in script %s\nScript suspended.", nParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + if (nParams > 32) + { + SHOW_ERROR("Argument count %d is out of supported range (32) of opcode [0AB1] in script %s", nParams, ((CCustomScript*)thread)->GetInfoStr().c_str()); + + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } } static SCRIPT_VAR arguments[32]; @@ -2228,8 +2256,32 @@ namespace CLEO { ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; - DWORD returnParamCount = 0; - if (*thread->GetBytePointer()) *thread >> returnParamCount; + DWORD returnParamCount = GetVarArgCount(thread); + if (returnParamCount) + { + DWORD declaredParamCount; + + auto paramType = (eDataType)*thread->GetBytePointer(); + switch (paramType) + { + // literal integers + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + *thread >> declaredParamCount; + break; + + default: + SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + if(returnParamCount - 1 != declaredParamCount) // minus 'num args' itself + { + SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + } if (returnParamCount) GetScriptParams(thread, returnParamCount); scmFunc->Return(thread); // jump back to cleo_call, right after last input param. Return slot var args starts here @@ -2237,9 +2289,10 @@ namespace CLEO delete scmFunc; DWORD returnSlotCount = GetVarArgCount(thread); - if (returnSlotCount > returnParamCount) + if(returnParamCount) returnParamCount--; // do not count the 'num args' argument itself + if (returnSlotCount != returnParamCount) { - SHOW_ERROR("Opcode [0AB2] returned fewer params than expected by function caller in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Opcode [0AB2] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } From c4fa849f3ce86f70f5b69c858128bca21b40e137 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 2 Nov 2023 04:54:50 +0100 Subject: [PATCH 057/216] custom opcodes handler fix --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e5348138..a41a7bea 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -6,7 +6,6 @@ #include "CTextManager.h" #include "CModelInfo.h" -#include #include #define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } @@ -397,6 +396,7 @@ namespace CLEO customOpcodeHandlers[i] = (_OpcodeHandler)customOpcodeHandler; } MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); + MemWrite(0x00469EF0, &customOpcodeHandlers); // TODO: game version translation FUNC_fopen = gvm.TranslateMemoryAddress(MA_FOPEN_FUNCTION); FUNC_fclose = gvm.TranslateMemoryAddress(MA_FCLOSE_FUNCTION); From f606c18e2d06ab897f87ca92ac6ea6ef8ecc645a Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 2 Nov 2023 05:20:38 +0100 Subject: [PATCH 058/216] Fixes (#23) * Compilation warnings fixes * fixed global debug mode state not initialized in spawned scripts --- source/CCustomOpcodeSystem.cpp | 11 ++++------- source/CScriptEngine.cpp | 34 ++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index a41a7bea..e9449bb5 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -764,9 +764,6 @@ namespace CLEO std::pair GetStringParamWriteBuffer(CRunningScript* thread) { - char* targetBuff; - DWORD targetSize; - lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); @@ -1278,7 +1275,7 @@ namespace CLEO cs->LogicalOp = eLogicalOperation::NONE; cs->NotFlag = false; - cs->SetScmFunction(thisScmFunctionId = allocationPlace); + cs->SetScmFunction(thisScmFunctionId = (unsigned short)allocationPlace); } void Return(CRunningScript *thread) @@ -1348,13 +1345,13 @@ namespace CLEO switch (size) { default: - GetInstance().CodeInjector.MemoryWrite(Address, value, vp, size); + GetInstance().CodeInjector.MemoryWrite(Address, (BYTE)value, vp, size); break; case 2: - GetInstance().CodeInjector.MemoryWrite(Address, value, vp); + GetInstance().CodeInjector.MemoryWrite(Address, (WORD)value, vp); break; case 4: - GetInstance().CodeInjector.MemoryWrite(Address, value, vp); + GetInstance().CodeInjector.MemoryWrite(Address, (DWORD)value, vp); break; } return OR_CONTINUE; diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 985e1bff..cfbe5323 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1002,25 +1002,31 @@ namespace CLEO TRACE("Searching for CLEO scripts"); - CCustomScript* cs = nullptr; - FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) { - cs = LoadScript(fullPath); - }); - - FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) { - cs = LoadScript(fullPath); - if (cs) cs->SetCompatibility(CLEO_VER_4); + FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) + { + if(auto cs = LoadScript(fullPath)) + { + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + } }); - FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) { - cs = LoadScript(fullPath); - if (cs) cs->SetCompatibility(CLEO_VER_3); + FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) + { + if (auto cs = LoadScript(fullPath)) + { + cs->SetCompatibility(CLEO_VER_4); + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + } }); - if (cs != nullptr) + FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) { - cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state - } + if (auto cs = LoadScript(fullPath)) + { + cs->SetCompatibility(CLEO_VER_3); + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + } + }); for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsLoaded)) { From c48d69e3b5b36bd9d47c3563631b55525dc6c3a3 Mon Sep 17 00:00:00 2001 From: Miran Date: Fri, 3 Nov 2023 22:54:14 +0100 Subject: [PATCH 059/216] CLEO paths handling updates. Detection of multiple CLEO.asi loaded --- CLEO5.vcxproj.filters | 5 ++- source/CCustomOpcodeSystem.cpp | 4 +-- source/CDebug.h | 4 +-- source/CModuleSystem.cpp | 14 +++----- source/CModuleSystem.h | 3 +- source/CPluginSystem.h | 7 ++-- source/CScriptEngine.cpp | 65 ++++++++++++++++------------------ source/CSoundSystem.cpp | 3 ++ source/CTextManager.cpp | 3 +- source/CleoBase.cpp | 17 +++++---- source/CleoBase.h | 2 -- source/FileEnumerator.h | 8 ++--- source/Singleton.h | 14 +++++--- source/dllmain.cpp | 1 - source/stdafx.h | 20 +++++++++++ 15 files changed, 96 insertions(+), 74 deletions(-) diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 8b149990..dc072f49 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -56,7 +56,7 @@ plugin_sdk - + source\utils @@ -158,6 +158,9 @@ source + + source\utils + diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e9449bb5..52361462 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -2133,8 +2133,8 @@ namespace CLEO } scmFunc->moduleExportRef = scriptRef.base; // to be released on return - reinterpret_cast(thread)->SetScriptFileDir(std::filesystem::path(modulePath).parent_path().string().c_str()); - reinterpret_cast(thread)->SetScriptFileName(std::filesystem::path(modulePath).filename().string().c_str()); + reinterpret_cast(thread)->SetScriptFileDir(FS::path(modulePath).parent_path().string().c_str()); + reinterpret_cast(thread)->SetScriptFileName(FS::path(modulePath).filename().string().c_str()); thread->SetBaseIp(scriptRef.base); label = scriptRef.offset; } diff --git a/source/CDebug.h b/source/CDebug.h index afa9b045..517850b0 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -5,14 +5,12 @@ #define LOG_WARNING(a,...) {Debug.Trace(CLEO::eLogLevel::Error, a, __VA_ARGS__);} #define SHOW_ERROR(a,...) {Debug.Error(a, __VA_ARGS__);} -const char szLogFileName[] = "cleo.log"; - std::string stringPrintf(const char* format, ...); class CDebug { public: - CDebug() : m_hFile(szLogFileName) + CDebug() : m_hFile(Filepath_Log) { Trace(CLEO::eLogLevel::Default, "Log started."); diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 98e0054d..9bb6267d 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -1,11 +1,9 @@ #include "stdafx.h" #include "CleoBase.h" #include "CModuleSystem.h" -#include "CFileMgr.h" #include "FileEnumerator.h" #include -#include #include using namespace CLEO; @@ -70,9 +68,7 @@ bool CModuleSystem::LoadDirectory(const char* path) bool CModuleSystem::LoadCleoModules() { - std::string path = CFileMgr::ms_rootDirName; - if (!path.empty() && path.back() != '\\') path.push_back('\\'); - path += "cleo\\cleo_modules"; + const auto path = FS::path(Filepath_Cleo).append("cleo_modules").string(); return LoadDirectory(path.c_str()); } @@ -123,10 +119,10 @@ void CModuleSystem::CModule::Update() { if (!updateNeeded) { - std::filesystem::file_time_type time; + FS::file_time_type time; try { - time = std::filesystem::last_write_time(filepath); + time = FS::last_write_time(filepath); } catch (...) { @@ -134,7 +130,7 @@ void CModuleSystem::CModule::Update() } // file not exists or up to date - if (time == std::filesystem::file_time_type{} || time == fileTime) + if (time == FS::file_time_type{} || time == fileTime) { // query files once a second for(size_t i = 0; i < 100 && updateActive; i++) @@ -201,7 +197,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) try { - fileTime = std::filesystem::last_write_time(path); + fileTime = FS::last_write_time(path); } catch(...) { diff --git a/source/CModuleSystem.h b/source/CModuleSystem.h index f385a4d4..f6cd2437 100644 --- a/source/CModuleSystem.h +++ b/source/CModuleSystem.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include #include #include @@ -58,7 +57,7 @@ namespace CLEO // hot reloading when source file modified std::atomic refCount = 0; - std::filesystem::file_time_type fileTime; // last write time of source file + FS::file_time_type fileTime; // last write time of source file void Update(); std::atomic updateActive = true; std::atomic updateNeeded = false; diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 11ffed3b..eb183bbf 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -20,7 +20,8 @@ namespace CLEO TRACE("Loading plugins..."); - FilesWalk("cleo\\cleo_plugins", ".cleo", [&](const char* fullPath, const char* filename) + auto path = FS::path(Filepath_Cleo).append("cleo_plugins").string(); + FilesWalk(path.c_str(), ".cleo", [&](const char* fullPath, const char* filename) { std::string name = filename; std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -46,7 +47,7 @@ namespace CLEO }); // load plugins from legacy location - FilesWalk("cleo", ".cleo", [&](const char* fullPath, const char* filename) + FilesWalk(Filepath_Cleo.c_str(), ".cleo", [&](const char* fullPath, const char* filename) { std::string name = filename; std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -57,7 +58,7 @@ namespace CLEO HMODULE hlib = LoadLibrary(fullPath); if (!hlib) { - LOG_WARNING("Error loading plugin '%s'", fullPath); + LOG_WARNING("Error while loading plugin '%s'", fullPath); } else { diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index cfbe5323..69ebf4c2 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -3,7 +3,6 @@ #include "CFileMgr.h" #include "CGame.h" -#include #include namespace CLEO @@ -652,10 +651,15 @@ namespace CLEO void CCustomScript::SetWorkDir(const char* directory) { + if(directory == nullptr || strlen(directory) == 0) + return; // Already done. Empty path is relative path starting at current work dir + + auto resolved = ResolvePath(directory); + if (!bIsCustom) - GetInstance().ScriptEngine.MainScriptCurWorkDir = directory; + GetInstance().ScriptEngine.MainScriptCurWorkDir = resolved; else - workDir = directory; + workDir = resolved; } std::string CCustomScript::ResolvePath(const char* path, const char* customWorkDir) const @@ -667,7 +671,7 @@ namespace CLEO try { - auto fsPath = std::filesystem::path(path); + auto fsPath = FS::path(path); // check for virtual path root enum class VPref{ None, Game, User, Script, Cleo, Modules } virtualPrefix = VPref::None; @@ -686,15 +690,14 @@ namespace CLEO { if(fsPath.is_relative()) { - auto workDir = ResolvePath(GetWorkDir()); - fsPath = workDir / fsPath; + fsPath = GetWorkDir() / fsPath; } - return std::filesystem::weakly_canonical(fsPath).string(); + return FS::weakly_canonical(fsPath).string(); } // expand virtual paths - std::filesystem::path resolved; + FS::path resolved; if (virtualPrefix == VPref::User) // user files location { @@ -703,12 +706,12 @@ namespace CLEO else if (virtualPrefix == VPref::Script) // this script's source file location { - resolved = ResolvePath(GetScriptFileDir()); + resolved = GetScriptFileDir(); } else { // all remaing variants starts with game root - resolved = std::filesystem::path(CFileMgr::ms_rootDirName); + resolved = Filepath_Root; switch(virtualPrefix) { @@ -721,7 +724,7 @@ namespace CLEO for(auto it = ++fsPath.begin(); it != fsPath.end(); it++) resolved /= *it; - return std::filesystem::weakly_canonical(resolved).string(); // collapse "..\" uses + return FS::weakly_canonical(resolved).string(); // collapse "..\" uses } catch (const std::exception& ex) { @@ -924,29 +927,26 @@ namespace CLEO { if (CGame::bMissionPackGame == 0) // regular main game { - MainScriptFileDir = std::string(DIR_GAME) + "\\data\\script"; + MainScriptFileDir = FS::path(Filepath_Cleo).append("data\\script").string(); MainScriptFileName = "main.scm"; } else // mission pack { - MainScriptFileDir = std::string(DIR_USER) + "\\MPACK\\MPACK"; - MainScriptFileDir += std::to_string(CGame::bMissionPackGame); + MainScriptFileDir = FS::path(GetUserDirectory()).append(stringPrintf("MPACK\\MPACK%d", CGame::bMissionPackGame)).string(); MainScriptFileName = "scr.scm"; } - NativeScriptsDebugMode = GetPrivateProfileInt("General", "DebugMode", 0, GetInstance().ConfigFilename.c_str()) != 0; - MainScriptCurWorkDir = DIR_GAME; + NativeScriptsDebugMode = GetPrivateProfileInt("General", "DebugMode", 0, Filepath_Config.c_str()) != 0; + MainScriptCurWorkDir = Filepath_Root; } void CScriptEngine::LoadCustomScripts(bool load_mode) { - char safe_name[MAX_PATH]; - // steam offset is different, so get it manually for now CGameVersionManager& gvm = GetInstance().VersionManager; int nSlot = gvm.GetGameVersion() != GV_STEAM ? *(BYTE*)&MenuManager->m_nSelectedSaveGame : *((BYTE*)MenuManager + 0x15B); - sprintf(safe_name, "./cleo/cleo_saves/cs%d.sav", nSlot); + auto saveFile = FS::path(Filepath_Cleo).append(stringPrintf("cleo_saves\\cs%d.sav", nSlot)).string(); safe_info = nullptr; stopped_info = nullptr; @@ -957,8 +957,8 @@ namespace CLEO // load cleo saving file try { - TRACE("Loading cleo safe %s", safe_name); - std::ifstream ss(safe_name, std::ios::binary); + TRACE("Loading cleo safe %s", saveFile.c_str()); + std::ifstream ss(saveFile.c_str(), std::ios::binary); if (ss.is_open()) { ss.exceptions(std::ios::eofbit | std::ios::badbit | std::ios::failbit); @@ -984,7 +984,7 @@ namespace CLEO } catch (std::exception& ex) { - TRACE("Loading of cleo safe %s failed: %s", safe_name, ex.what()); + TRACE("Loading of cleo safe %s failed: %s", saveFile.c_str(), ex.what()); safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; memset(CleoVariables, 0, sizeof(CleoVariables)); } @@ -994,23 +994,18 @@ namespace CLEO memset(CleoVariables, 0, sizeof(CleoVariables)); } - // [game root]\cleo - /*std::string scriptsDir = CFileMgr::ms_rootDirName; - if (!scriptsDir.empty() && scriptsDir.back() != '\\') scriptsDir.push_back('\\'); - scriptsDir += "cleo";*/ - std::string scriptsDir = "cleo"; // TODO: restore to absolute path when ModLoader is updated to support CLEO5 - TRACE("Searching for CLEO scripts"); + std::string scriptsDir = "cleo"; // TODO: use Filepath_Cleo instead ModLoader is updated to support CLEO5 FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) { - if(auto cs = LoadScript(fullPath)) + if (auto cs = LoadScript(fullPath)) { cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state } }); - FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) + FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) { if (auto cs = LoadScript(fullPath)) { @@ -1019,7 +1014,7 @@ namespace CLEO } }); - FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) + FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) { if (auto cs = LoadScript(fullPath)) { @@ -1034,7 +1029,7 @@ namespace CLEO ((callback*)func)(); } - TRACE("Scripts search done."); + TRACE("Scripts search done"); } CCustomScript * CScriptEngine::LoadScript(const char * szFilePath) @@ -1318,12 +1313,12 @@ namespace CLEO TRACE("Loading custom script %s...", szFileName); // store script file directory and name - std::filesystem::path path = szFileName; - path = std::filesystem::weakly_canonical(path); + FS::path path = szFileName; + path = FS::weakly_canonical(path); scriptFileDir = path.parent_path().string(); scriptFileName = path.filename().string(); - workDir = DIR_GAME; + workDir = Filepath_Root; try { diff --git a/source/CSoundSystem.cpp b/source/CSoundSystem.cpp index 4a9e8677..e54501bf 100644 --- a/source/CSoundSystem.cpp +++ b/source/CSoundSystem.cpp @@ -3,6 +3,7 @@ #include "bass.h" #include "CDebug.h" #include "CleoBase.h" +#include "Singleton.h" #include namespace CLEO @@ -28,6 +29,8 @@ namespace CLEO LRESULT __stdcall HOOK_DefWindowProc(HWND wnd, UINT msg, WPARAM wparam, LPARAM lparam) { + CleoSingletonCheck(); // check once for CLEO.asi duplicates + if (GetInstance().SoundSystem.Initialized()) { // pause streams if the window loses focus, or if SA found any other reason to pause diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index b0fd9002..9030e5dc 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -104,7 +104,8 @@ namespace CLEO CTextManager::CTextManager() : fxts(1, crc32FromUpcaseStdString) { // parse FXT files - FilesWalk("cleo\\cleo_text", ".fxt", [this](const char* fullPath, const char* filename) + auto path = FS::path(Filepath_Cleo).append("cleo_text").string(); + FilesWalk(path.c_str(), ".fxt", [this](const char* fullPath, const char* filename) { TRACE("Parsing FXT file %s", fullPath); try diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 244f9107..2ef3bb71 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -1,6 +1,5 @@ #include "stdafx.h" #include "CleoBase.h" -#include namespace CLEO @@ -31,12 +30,18 @@ namespace CLEO { if (m_bStarted) return; // already started - ConfigFilename = std::filesystem::current_path().append("cleo\\.cleo_config.ini").string(); + /*if (FS::current_path() != Filepath_Root) + { + MessageBox(NULL, "CLEO.asi has to be placed in game's root directory!", "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + exit(1); // terminate the game + }*/ + + FS::create_directory(Filepath_Cleo); + FS::create_directory(FS::path(Filepath_Cleo).append("cleo_modules")); + FS::create_directory(FS::path(Filepath_Cleo).append("cleo_plugins")); + FS::create_directory(FS::path(Filepath_Cleo).append("cleo_saves")); + FS::create_directory(FS::path(Filepath_Cleo).append("cleo_text")); - CreateDirectory("cleo", NULL); - CreateDirectory("cleo/cleo_modules", NULL); - CreateDirectory("cleo/cleo_saves", NULL); - CreateDirectory("cleo/cleo_text", NULL); CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); diff --git a/source/CleoBase.h b/source/CleoBase.h index 99831f23..e54181c4 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -35,8 +35,6 @@ namespace CLEO //CLegacy Legacy; HWND MainWnd; - std::string ConfigFilename; - CCleoInstance(); virtual ~CCleoInstance(); diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index a45cfc16..05b61eb2 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -6,7 +6,7 @@ void FilesWalk(const char* directory, const char* extension, T callback) { /*try { - for (auto& it : std::filesystem::directory_iterator(directory)) + for (auto& it : FS::directory_iterator(directory)) { if (it.is_regular_file()) { @@ -20,7 +20,7 @@ void FilesWalk(const char* directory, const char* extension, T callback) } } - auto result = std::filesystem::absolute(filePath); + auto result = FS::absolute(filePath); callback(result.string().c_str(), result.filename().string().c_str()); } } @@ -57,12 +57,12 @@ void FilesWalk(const char* directory, const char* extension, T callback) } std::string path; - if (std::filesystem::path(wfd.cFileName).is_absolute()) + if (FS::path(wfd.cFileName).is_absolute()) path = wfd.cFileName; // somebody hacked findFirstFile APIs and is providing us absolute path else path = pattern.substr(0, baseDirLen) + wfd.cFileName; // standard - auto result = std::filesystem::weakly_canonical(path); // will use CWD if input path was relative! + auto result = FS::weakly_canonical(path); // will use CWD if input path was relative! callback(result.string().c_str(), result.filename().string().c_str()); } while (FindNextFile(hSearch, &wfd)); diff --git a/source/Singleton.h b/source/Singleton.h index 76f9f34f..f07dfb8e 100644 --- a/source/Singleton.h +++ b/source/Singleton.h @@ -2,10 +2,12 @@ #include #include -class _CleoSingleton +static bool CleoSingletonChecked = false; + +// search for CLEO.asi modules loaded, terminate game if duplicate found +static void CleoSingletonCheck() { -public: - _CleoSingleton() + if(!CleoSingletonChecked) { MODULEENTRY32 module; module.dwSize = sizeof(MODULEENTRY32); @@ -25,7 +27,7 @@ class _CleoSingleton { CloseHandle(snapshot); MessageBox(NULL, "Another copy of CLEO.asi is already loaded!\nPlease remove duplicated files.", "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); - exit(1); + exit(1); // terminate the game break; } } @@ -33,6 +35,8 @@ class _CleoSingleton CloseHandle(snapshot); } + + CleoSingletonChecked = true; } -} CleoSingleton; +} diff --git a/source/dllmain.cpp b/source/dllmain.cpp index 8409f042..abdbd40b 100644 --- a/source/dllmain.cpp +++ b/source/dllmain.cpp @@ -1,5 +1,4 @@ #include "stdafx.h" -#include "Singleton.h" #include "CleoBase.h" #include "CDebug.h" diff --git a/source/stdafx.h b/source/stdafx.h index 8526306c..734c39af 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,25 @@ #include #include + +// global constant paths. Initialize before anything else +namespace FS = std::filesystem; + +static std::string GetApplicationDirectory() +{ + char buffer[512]; + GetModuleFileNameA(NULL, buffer, sizeof(buffer) - 1); // game exe absolute path + return FS::path(buffer).parent_path().string(); +} +static const std::string Filepath_Root = GetApplicationDirectory(); + +//static const std::string Filepath_Cleo = FS::path(Filepath_Root).append("cleo").string(); // absolute path +static const std::string Filepath_Cleo = "cleo"; // relative path - allow mod loaders to affect it + +static const std::string Filepath_Config = FS::path(Filepath_Cleo).append(".cleo_config.ini").string(); +static const std::string Filepath_Log = FS::path(Filepath_Cleo).append(".cleo.log").string(); + + #include #include #include From 4d03408a4323a7071d269af65e81e2dba0f38a93 Mon Sep 17 00:00:00 2001 From: Seemann Date: Wed, 8 Nov 2023 04:08:41 -0500 Subject: [PATCH 060/216] ensure eax is reset before calling a function (#24) --- source/CCustomOpcodeSystem.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 52361462..b2818b8f 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1778,7 +1778,8 @@ namespace CLEO add ecx, 0x4 jmp loop_0AA5 loop_end_0AA5 : - call func + xor eax, eax + call func add esp, stackAlign } @@ -1837,7 +1838,8 @@ namespace CLEO add ecx, 0x4 jmp loop_0AA6 loop_end_0AA6 : - mov ecx, struc + mov ecx, struc + xor eax, eax call func add esp, stackAlign } @@ -1904,7 +1906,8 @@ namespace CLEO add ecx, 0x4 jmp loop_0AA7 loop_end_0AA7 : - call func + xor eax, eax + call func mov result, eax add esp, stackAlign } @@ -1973,7 +1976,8 @@ namespace CLEO add ecx, 0x4 jmp loop_0AA8 loop_end_0AA8 : - mov ecx, struc + mov ecx, struc + xor eax, eax call func mov result, eax add esp, stackAlign From 30df31bd7697f8d1271d2c52e3a10ca341c1aaa2 Mon Sep 17 00:00:00 2001 From: Miran Date: Wed, 8 Nov 2023 10:59:22 +0100 Subject: [PATCH 061/216] Argument count validation in 0AA5-0AA8 opcodes. --- source/CCustomOpcodeSystem.cpp | 75 +++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b2818b8f..c65024fb 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1732,10 +1732,17 @@ namespace CLEO { static char textParams[5][MAX_STR_LEN]; unsigned currTextParam = 0; static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); - DWORD numParams; - DWORD stackAlign; - *thread >> func >> numParams >> stackAlign; + void(*func)(); *thread >> func; + DWORD numParams; *thread >> numParams; + DWORD stackAlign; *thread >> stackAlign; // pop + + auto nVarArg = GetVarArgCount(thread); + if (numParams != nVarArg) + { + SHOW_ERROR("Opcode [0AA5] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); stackAlign *= 4; SCRIPT_VAR *arguments_end = arguments + numParams; @@ -1792,11 +1799,18 @@ namespace CLEO { static char textParams[5][MAX_STR_LEN]; unsigned currTextParam = 0; static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); - void *struc; - DWORD numParams; - DWORD stackAlign; - *thread >> func >> struc >> numParams >> stackAlign; + void(*func)(); *thread >> func; + void* struc; *thread >> struc; + DWORD numParams; *thread >> numParams; + DWORD stackAlign; *thread >> stackAlign; // pop + + auto nVarArg = GetVarArgCount(thread); + if (numParams != nVarArg) + { + SHOW_ERROR("Opcode [0AA6] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); stackAlign *= 4; SCRIPT_VAR *arguments_end = arguments + numParams; @@ -1848,19 +1862,26 @@ namespace CLEO return OR_CONTINUE; } - //0AA7=-1,call_function %1d% num_params %2h% pop %3h% + //0AA7=-1,call_function_return %1d% num_params %2h% pop %3h% OpcodeResult __stdcall opcode_0AA7(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; + static char textParams[5][MAX_STR_LEN]; DWORD currTextParam = 0; static SCRIPT_VAR arguments[50] = { 0 }; - DWORD currTextParam = 0; - void(*func)(); - DWORD numParams; - DWORD stackAlign; - *thread >> func >> numParams >> stackAlign; + void(*func)(); *thread >> func; + DWORD numParams; *thread >> numParams; + DWORD stackAlign; *thread >> stackAlign; // pop + + int nVarArg = GetVarArgCount(thread); + if (numParams + 1 != nVarArg) // and return argument + { + SHOW_ERROR("Opcode [0AA7] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, (int)nVarArg - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); stackAlign *= 4; SCRIPT_VAR * arguments_end = arguments + numParams; + // retrieve parameters for (SCRIPT_VAR *arg = arguments; arg != arguments_end; ++arg) { @@ -1917,17 +1938,23 @@ namespace CLEO return OR_CONTINUE; } - //0AA8=-1,call_function_method %1d% struct %2d% num_params %3h% pop %4h% + //0AA8=-1,call_method_return %1d% struct %2d% num_params %3h% pop %4h% OpcodeResult __stdcall opcode_0AA8(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; + static char textParams[5][MAX_STR_LEN]; DWORD currTextParam = 0; static SCRIPT_VAR arguments[50] = { 0 }; - DWORD currTextParam = 0; - void(*func)(); - void *struc; - DWORD numParams; - DWORD stackAlign; - *thread >> func >> struc >> numParams >> stackAlign; + void(*func)(); *thread >> func; + void* struc; *thread >> struc; + DWORD numParams; *thread >> numParams; + DWORD stackAlign; *thread >> stackAlign; // pop + + int nVarArg = GetVarArgCount(thread); + if (numParams + 1 != nVarArg) // and return argument + { + SHOW_ERROR("Opcode [0AA8] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, (int)nVarArg - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); stackAlign *= 4; SCRIPT_VAR *arguments_end = arguments + numParams; From d0a0cdc22a5d9bb3672df1f8dd639878a66d186f Mon Sep 17 00:00:00 2001 From: Miran Date: Wed, 8 Nov 2023 11:04:18 +0100 Subject: [PATCH 062/216] Readme updates. --- CHANGELOG.md | 6 +++++- README.md | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7466c1ff..dfafdc66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,4 +59,8 @@ - configured automatic releases on GitHub - added setup_env.bat script -For older changes, see [CLEO4 changelog](https://github.com/cleolibrary/CLEO4/blob/master/CHANGELOG.md) +#### Special Thanks +- **123nir** for the alpha-testing, troubleshooting and valuable bug reports + +## Older +For previous changes, see [CLEO4 changelog](https://github.com/cleolibrary/CLEO4/blob/master/CHANGELOG.md) diff --git a/README.md b/README.md index 649f002f..52f5bb5f 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Special thanks to: - Stanislav Golovin (a.k.a. listener) for his great work in exploration of the GTA series. - NTAuthority and LINK/2012 for additional support with CLEO 4.3. - mfisto for the alpha-testing of CLEO 4, his support and advices. +- 123nir for the alpha-testing of CLEO 5.0.0, troubleshooting and valuable bug reports. The developers have no connection with Take 2 Interactive or Rockstar Games. By using this product or any of the additional products included you take your own personal responsibility for any negative consequences should they arise. From 6c0b3d27135bd5e6158dbd2c79856374ab660ca1 Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 11 Nov 2023 08:48:44 +0100 Subject: [PATCH 063/216] Fix crash in ScriptDeleteDelegate --- source/CCustomOpcodeSystem.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c65024fb..e0372b79 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -6,6 +6,7 @@ #include "CTextManager.h" #include "CModelInfo.h" +#include #include #define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } @@ -131,7 +132,32 @@ namespace CLEO std::vector funcs; template void operator+=(FuncScriptDeleteDelegateT mFunc) { funcs.push_back(mFunc); } template void operator-=(FuncScriptDeleteDelegateT mFunc) { funcs.erase(std::remove(funcs.begin(), funcs.end(), mFunc), funcs.end()); } - void operator()(CRunningScript *script) { for (auto& f : funcs) f(script); } + void operator()(CRunningScript *script) + { + for (auto& f : funcs) + { + // check if function pointer lays within any of currently loaded modules (.asi or .cleo plugins) + void* ptr = f; + MODULEENTRY32 module; + module.dwSize = sizeof(MODULEENTRY32); + HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()); + Module32First(snapshot, &module); + if (snapshot != INVALID_HANDLE_VALUE) + { + size_t count = 0; + do + { + if(ptr >= module.modBaseAddr && ptr <= (module.modBaseAddr + module.modBaseSize)) + { + f(script); + break; + } + } while (Module32Next(snapshot, &module)); + CloseHandle(snapshot); + } + } + + } }; ScriptDeleteDelegate scriptDeleteDelegate; void RunScriptDeleteDelegate(CRunningScript *script) { scriptDeleteDelegate(script); } From 9185b9e3547cadd039e6080605878cbfabd1092c Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 11 Nov 2023 22:26:26 +0100 Subject: [PATCH 064/216] New opcode sign_extend --- CHANGELOG.md | 6 ++--- cleo_plugins/IntOperations/IntOperations.cpp | 28 ++++++++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfafdc66..285d290c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - SCM functions **(0AB1)** now keep their own GOSUB's call stack + - new opcode **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `root:\` for _game root_ directory @@ -24,12 +26,10 @@ - `modules:\` for _CLEO\cleo_modules_ directory - rewritten opcode **0A99 (set_current_directory)**. It no longer affects internal game state and other scripts - improved error handling - - more detailed error messages in some scenarios + - more detailed error messages in multiple scenarios - some errors now cause the script to pause, instead of crashing the game -- SCM functions **(0AB1)** now keep their own GOSUB's call stack - updated included Silent's ASI Loader to version 1.3 - ### Bug Fixes - fixed error in **004E (terminate_this_script)** allowing to run multiple missions - fixed handling of strings longer than 128 characters causing errors in some cases diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index b14f436f..4b1a8f0f 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -27,6 +27,7 @@ class IntOperations CLEO_RegisterOpcode(0x0B1B, Scr_IntOp_MOD); CLEO_RegisterOpcode(0x0B1C, Scr_IntOp_SHR); CLEO_RegisterOpcode(0x0B1D, Scr_IntOp_SHL); + CLEO_RegisterOpcode(0x0B1E, Sign_Extend); } else { @@ -199,7 +200,7 @@ class IntOperations static OpcodeResult WINAPI Scr_IntOp_SHR(CScriptThread* thread) /**************************************************************** Opcode Format - 0B1C=3,%1d% >>= %2d% + 0B1C=2,%1d% >>= %2d% ****************************************************************/ { SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); @@ -211,7 +212,7 @@ class IntOperations static OpcodeResult WINAPI Scr_IntOp_SHL(CScriptThread* thread) /**************************************************************** Opcode Format - 0B1D=3,%1d% <<= %2d% + 0B1D=2,%1d% <<= %2d% ****************************************************************/ { SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); @@ -219,4 +220,27 @@ class IntOperations op->dwParam <<= val; return OR_CONTINUE; } + + static OpcodeResult WINAPI Sign_Extend(CScriptThread* thread) + /**************************************************************** + Opcode Format + 0B1E=2,sign_extend %1d% size %2d% + ****************************************************************/ + { + SCRIPT_VAR* op = CLEO_GetPointerToScriptVariable(thread); + int size = CLEO_GetIntOpcodeParam(thread); + + if (size > 0 && size < 4) + { + size_t offset = size * 8 - 1; // bit offset of top most bit in source value + bool signBit = op->dwParam & (1 << offset); + + if(signBit) + { + op->dwParam |= 0xFFFFFFFF << offset; // set all upper bits + } + } + + return OR_CONTINUE; + } } intOperations; From c46fd99f2d6ea7539880d132dd4ddd529a7cf336 Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 11 Nov 2023 04:26:24 +0100 Subject: [PATCH 065/216] Fixed support of provided custom work dir in path resolving. --- source/CScriptEngine.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 69ebf4c2..59720d36 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -690,7 +690,10 @@ namespace CLEO { if(fsPath.is_relative()) { - fsPath = GetWorkDir() / fsPath; + if(customWorkDir != nullptr) + fsPath = ResolvePath(customWorkDir) / fsPath; + else + fsPath = GetWorkDir() / fsPath; } return FS::weakly_canonical(fsPath).string(); @@ -737,10 +740,10 @@ namespace CLEO { std::ostringstream ss; - auto threadName = GetName(); + auto threadName = std::string(GetName(), GetName() + 8); // thread name might not be null terminated auto fileName = GetScriptFileName(); - if(memcmp(threadName, fileName, strlen(threadName)) != 0) // thread name no longer same as filename (was set with 03A4) + if(memcmp(threadName.c_str(), fileName, threadName.length()) != 0) // thread name no longer same as filename (was set with 03A4) { ss << "'" << threadName << "' from "; } From ecec1113115a0af04c0c16f705084650603c50b6 Mon Sep 17 00:00:00 2001 From: Miran Date: Sat, 11 Nov 2023 20:25:12 +0100 Subject: [PATCH 066/216] Fixed thread name unterminated string uses. --- cleo_sdk/CLEO.h | 2 +- source/CScriptEngine.cpp | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 28ab1a41..a4ea1ed4 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -288,7 +288,7 @@ struct CRunningScript bool IsExternal() const { return bIsExternal; } bool IsMission() const { return bIsMission; } bool IsCustom() const { return bIsCustom; } // is this CLEO Script? - const char* GetName() const { return Name; } + std::string GetName() const { auto str = std::string(Name, Name + 8); str.resize(strlen(str.c_str())); return str; } // make sure it is always null terminated BYTE* GetBasePointer() const { return (BYTE*)BaseIP; } BYTE* GetBytePointer() const { return CurrentIP; } void SetIp(void* ip) { CurrentIP = (BYTE*)ip; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 59720d36..21003318 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -740,7 +740,7 @@ namespace CLEO { std::ostringstream ss; - auto threadName = std::string(GetName(), GetName() + 8); // thread name might not be null terminated + auto threadName = GetName(); auto fileName = GetScriptFileName(); if(memcmp(threadName.c_str(), fileName, threadName.length()) != 0) // thread name no longer same as filename (was set with 03A4) @@ -1139,7 +1139,7 @@ namespace CLEO { for (auto script = *activeThreadQueue; script; script = script->GetNext()) { - if (_stricmp(name, script->GetName()) == 0) + if (_stricmp(name, script->GetName().c_str()) == 0) return script; } return nullptr; @@ -1148,13 +1148,13 @@ namespace CLEO { if (CustomMission) { - if (_stricmp(name, CustomMission->Name) == 0) return CustomMission; + if (_stricmp(name, CustomMission->GetName().c_str()) == 0) return CustomMission; } for (auto it = CustomScripts.begin(); it != CustomScripts.end(); ++it) { auto cs = *it; - if (_stricmp(name, cs->Name) == 0) + if (_stricmp(name, cs->GetName().c_str()) == 0) return cs; } @@ -1182,12 +1182,12 @@ namespace CLEO { if (cs->IsMission()) { - TRACE("Registering custom mission named %s", cs->Name); + TRACE("Registering custom mission named %s", cs->GetName().c_str()); CustomMission = cs; } else { - TRACE("Registering custom script named %s", cs->Name); + TRACE("Registering custom script named %s", cs->GetName().c_str()); CustomScripts.push_back(cs); } AddScriptToQueue(cs, activeThreadQueue); @@ -1220,7 +1220,7 @@ namespace CLEO } if (cs == CustomMission) { - TRACE("Unregistering custom mission named %s", cs->Name); + TRACE("Unregistering custom mission named %s", cs->GetName().c_str()); RemoveScriptFromQueue(CustomMission, activeThreadQueue); ScriptsWaitingForDelete.push_back(cs); CustomMission->SetActive(false); @@ -1232,11 +1232,11 @@ namespace CLEO if (cs->bSaveEnabled) { InactiveScriptHashes.insert(cs->dwChecksum); - TRACE("Stopping custom script named %s", cs->Name); + TRACE("Stopping custom script named %s", cs->GetName().c_str()); } else { - TRACE("Unregistering custom script named %s", cs->Name); + TRACE("Unregistering custom script named %s", cs->GetName().c_str()); ScriptsWaitingForDelete.push_back(cs); } @@ -1257,7 +1257,7 @@ namespace CLEO { InactiveScriptHashes.clear(); std::for_each(CustomScripts.begin(), CustomScripts.end(), [this](CCustomScript *cs) { - TRACE("Unregistering custom script named %s", cs->Name); + TRACE("Unregistering custom script named %s", cs->GetName().c_str()); RemoveScriptFromQueue(cs, activeThreadQueue); //AddScriptToQueue(cs, inactiveThreadQueue); //if(cs->GetPrev()) cs->GetPrev()->SetNext(nullptr); @@ -1268,13 +1268,13 @@ namespace CLEO }); CustomScripts.clear(); std::for_each(ScriptsWaitingForDelete.begin(), ScriptsWaitingForDelete.end(), [this](CCustomScript *cs) { - TRACE("Deleting inactive script named %s", cs->Name); + TRACE("Deleting inactive script named %s", cs->GetName().c_str()); delete cs; }); ScriptsWaitingForDelete.clear(); if (CustomMission) { - TRACE("Unregistering custom mission named %s", CustomMission->Name); + TRACE("Unregistering custom mission named %s", CustomMission->GetName().c_str()); RemoveScriptFromQueue(CustomMission, activeThreadQueue); CustomMission->SetActive(false); delete CustomMission; From a137881a21c0633aaf50d03e7ef5d4631942c4cb Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 12 Nov 2023 03:24:13 +0100 Subject: [PATCH 067/216] Argument count mismatch validation in 0AB2 split into critical and non critical cases. (#27) --- cleo_sdk/CLEO.h | 1 + source/CCustomOpcodeSystem.cpp | 46 ++++++++++++++++++++-------------- source/CDebug.cpp | 18 +++++++++++++ source/CDebug.h | 10 ++++++-- source/CModuleSystem.cpp | 20 +++++++-------- source/CPluginSystem.h | 8 +++--- source/CScriptEngine.cpp | 4 +-- source/CSoundSystem.cpp | 8 +++--- source/CTextManager.cpp | 4 +-- 9 files changed, 76 insertions(+), 43 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index a4ea1ed4..1c1482f6 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -27,6 +27,7 @@ enum eCLEO_Version : DWORD CLEO_VER_4_3 = 0x04030000, CLEO_VER_4_4 = 0x04040000, CLEO_VER_4 = CLEO_VER_4_4, + CLEO_VER_5 = 0x05000000, CLEO_VER_CUR = CLEO_VERSION }; diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e0372b79..0b450503 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -477,7 +477,7 @@ namespace CLEO CustomOpcodeHandler& dst = customOpcodeProc[opcode]; if (*dst != nullptr) { - LOG_WARNING("Opcode [%04X] already registered! Replacing...", opcode); + LOG_WARNING(0, "Opcode [%04X] already registered! Replacing...", opcode); } dst = callback; @@ -501,7 +501,7 @@ namespace CLEO break; default: - LOG_WARNING("Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -525,7 +525,7 @@ namespace CLEO break; default: - LOG_WARNING("Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].dwParam = uval; @@ -549,7 +549,7 @@ namespace CLEO break; default: - LOG_WARNING("Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -573,7 +573,7 @@ namespace CLEO break; default: - LOG_WARNING("Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].nParam = nval; @@ -594,7 +594,7 @@ namespace CLEO break; default: - LOG_WARNING("Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -614,7 +614,7 @@ namespace CLEO break; default: - LOG_WARNING("Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].fParam = fval; @@ -1020,7 +1020,7 @@ namespace CLEO if (CLEO_GetOperandType(thread) != DT_END) { lastErrorMsg = "More params than slots in formatted string"; - LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } SkipUnusedVarArgs(thread); // skip terminator too @@ -1469,7 +1469,7 @@ namespace CLEO { if (cs) delete cs; SkipUnusedVarArgs(thread); - LOG_WARNING("Failed to load script '%s' in script ", filename.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(0, "Failed to load script '%s' in script ", filename.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } return OR_CONTINUE; @@ -1481,7 +1481,7 @@ namespace CLEO CCustomScript *cs = reinterpret_cast(thread); if (thread->IsMission() || !cs->IsCustom()) { - LOG_WARNING("Incorrect usage of opcode [0A93] in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(0, "Incorrect usage of opcode [0A93] in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return OR_CONTINUE; } @@ -1514,7 +1514,7 @@ namespace CLEO { if (cs) delete cs; SkipUnusedVarArgs(thread); - LOG_WARNING("[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); + LOG_WARNING(0, "[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); } return OR_CONTINUE; @@ -1576,7 +1576,7 @@ namespace CLEO case 1: path = DIR_USER; break; case 2: path = DIR_SCRIPT; break; default: - LOG_WARNING("Value (%d) not known by opcode [0A99] in script %s", param, ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(0, "Value (%d) not known by opcode [0A99] in script %s", param, ((CCustomScript*)thread)->GetInfoStr().c_str()); return OR_CONTINUE; } @@ -2089,7 +2089,7 @@ namespace CLEO case 2: stream->Pause(); break; case 3: stream->Resume(); break; default: - LOG_WARNING("[0AAD] Unknown audiostream's action (%d) in script %s", action, ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "[0AAD] Unknown audiostream's action (%d) in script %s", action, ((CCustomScript*)thread)->GetInfoStr().c_str()); } } return OR_CONTINUE; @@ -2330,11 +2330,15 @@ namespace CLEO return CCustomOpcodeSystem::ErrorSuspendScript(thread); } - if(returnParamCount - 1 != declaredParamCount) // minus 'num args' itself + if(returnParamCount - 1 < declaredParamCount) // minus 'num args' itself { SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } + else if (returnParamCount - 1 > declaredParamCount) // more args than needed, not critical + { + LOG_WARNING(thread, "Opcode [0AB2] declared %d return args, but provided %d in script %s", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + } } if (returnParamCount) GetScriptParams(thread, returnParamCount); @@ -2344,11 +2348,15 @@ namespace CLEO DWORD returnSlotCount = GetVarArgCount(thread); if(returnParamCount) returnParamCount--; // do not count the 'num args' argument itself - if (returnSlotCount != returnParamCount) + if (returnSlotCount > returnParamCount) { SHOW_ERROR("Opcode [0AB2] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } + else if (returnSlotCount < returnParamCount) // more args than needed, not critical + { + LOG_WARNING(thread, "Opcode [0AB2] returned %d params, while function caller expected %d in script %s", returnParamCount, returnSlotCount, ((CCustomScript*)thread)->GetInfoStr().c_str()); + } if (returnSlotCount) SetScriptParams(thread, returnSlotCount); thread->IncPtr(); // skip var args terminator @@ -3270,7 +3278,7 @@ extern "C" auto result = ReadStringParam(thread, buf, size); if (result == nullptr) - LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return result; } @@ -3278,7 +3286,7 @@ extern "C" void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { if(!WriteStringParam(thread, str)) - LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int bufSize) @@ -3289,7 +3297,7 @@ extern "C" if(ReadFormattedString(thread, buf, bufSize, format) == -1) // error? { - LOG_WARNING("%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return nullptr; // error } @@ -3422,7 +3430,7 @@ extern "C" { if (cs) delete cs; if (fromThread) SkipUnusedVarArgs(fromThread); - LOG_WARNING("Failed to load script '%s'.", script_name); + LOG_WARNING(0, "Failed to load script '%s'.", script_name); return nullptr; } diff --git a/source/CDebug.cpp b/source/CDebug.cpp index 3dc8d445..bc2202c9 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -31,6 +31,24 @@ void CDebug::Trace(eLogLevel level, const char* format, ...) va_end(args); } +void CDebug::Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...) +{ + if(thread != nullptr && thread->IsCustom()) + { + const auto cs = (CCustomScript*)thread; + + if(cs->GetCompatibility() < CLEO_VER_5) + { + return; // do not log this in older versions + } + } + + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); +} + const char* CDebug::TraceVArg(CLEO::eLogLevel level, const char* format, va_list args) { std::lock_guard guard(mutex); diff --git a/source/CDebug.h b/source/CDebug.h index 517850b0..52a06d7d 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -1,12 +1,17 @@ #pragma once #include -#define TRACE(a,...) {Debug.Trace(CLEO::eLogLevel::Default, a, __VA_ARGS__);} -#define LOG_WARNING(a,...) {Debug.Trace(CLEO::eLogLevel::Error, a, __VA_ARGS__);} +#define TRACE(format,...) {Debug.Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} +#define LOG_WARNING(script, format, ...) {Debug.Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} #define SHOW_ERROR(a,...) {Debug.Error(a, __VA_ARGS__);} std::string stringPrintf(const char* format, ...); +namespace CLEO +{ + class CRunningScript; +} + class CDebug { public: @@ -27,6 +32,7 @@ class CDebug } void Trace(CLEO::eLogLevel level, const char* format, ...); + void Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...); void Error(const char* format, ...); private: diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 9bb6267d..6b99a18c 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -207,7 +207,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) std::ifstream file(path, std::ios::binary); if (!file.good()) { - LOG_WARNING("Failed to open module file '%s'", path); + LOG_WARNING(0, "Failed to open module file '%s'", path); return false; } @@ -232,7 +232,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) file.read((char*)&segment, sizeof(segment)); if (file.fail()) { - LOG_WARNING("Module '%s' file header read error", path); + LOG_WARNING(0, "Module '%s' file header read error", path); return false; } @@ -241,7 +241,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) segment.jumpAddress >= 0 || // jump labels should be negative values std::memcmp(segment.magic, Segment_Magic, sizeof(Segment_Magic)) != 0) // not a custom header { - LOG_WARNING("Module '%s' load error. Custom segment not present", path); + LOG_WARNING(0, "Module '%s' load error. Custom segment not present", path); return false; } segment.jumpAddress = abs(segment.jumpAddress); // turn label into actual file offset @@ -262,7 +262,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) if (file.fail() || file.tellg() > segment.jumpAddress) // read past the segment end { - LOG_WARNING("Module '%s' load error. Invalid custom header", path); + LOG_WARNING(0, "Module '%s' load error. Invalid custom header", path); return false; } @@ -274,7 +274,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) { if (headerEndPos > segment.jumpAddress) { - LOG_WARNING("Module '%s' load error. Invalid size of exports header", path); + LOG_WARNING(0, "Module '%s' load error. Invalid size of exports header", path); return false; } @@ -288,11 +288,11 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) { if (e.name.empty()) { - LOG_WARNING("Module '%s' export load error.", path); + LOG_WARNING(0, "Module '%s' export load error.", path); } else { - LOG_WARNING("Module's '%s' export '%s' load error.", path, e.name.c_str()); + LOG_WARNING(0, "Module's '%s' export '%s' load error.", path, e.name.c_str()); } return false; } @@ -311,7 +311,7 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) file.seekg(headerEndPos, file.beg); if (file.fail()) { - LOG_WARNING("Module '%s' load error. Error while skipping unknown header type", path); + LOG_WARNING(0, "Module '%s' load error. Error while skipping unknown header type", path); return false; } } @@ -319,13 +319,13 @@ bool CModuleSystem::CModule::LoadFromFile(const char* path) if (!file.good()) { - LOG_WARNING("Module '%s' read error", path); + LOG_WARNING(0, "Module '%s' read error", path); return false; } if (!result) // no usable elements found. No point to keeping this module { - LOG_WARNING("Module '%s' skipped. Nothing found", path); + LOG_WARNING(0, "Module '%s' skipped. Nothing found", path); return false; } diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index eb183bbf..8a071eb4 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -32,7 +32,7 @@ namespace CLEO HMODULE hlib = LoadLibrary(fullPath); if (!hlib) { - LOG_WARNING("Error loading plugin '%s'", fullPath); + LOG_WARNING(0, "Error loading plugin '%s'", fullPath); } else { @@ -42,7 +42,7 @@ namespace CLEO } else { - LOG_WARNING("Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); + LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); } }); @@ -58,7 +58,7 @@ namespace CLEO HMODULE hlib = LoadLibrary(fullPath); if (!hlib) { - LOG_WARNING("Error while loading plugin '%s'", fullPath); + LOG_WARNING(0, "Error while loading plugin '%s'", fullPath); } else { @@ -68,7 +68,7 @@ namespace CLEO } else { - LOG_WARNING("Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); + LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); } }); } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 21003318..44192259 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1372,11 +1372,11 @@ namespace CLEO } catch (std::exception& e) { - LOG_WARNING("Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); + LOG_WARNING(0, "Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); } catch (...) { - LOG_WARNING("Unknown error during loading of custom script %s occured.", szFileName); + LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", szFileName); } } diff --git a/source/CSoundSystem.cpp b/source/CSoundSystem.cpp index e54501bf..1705951a 100644 --- a/source/CSoundSystem.cpp +++ b/source/CSoundSystem.cpp @@ -13,7 +13,7 @@ namespace CLEO HWND OnCreateMainWindow(HINSTANCE hinst) { - if (HIWORD(BASS_GetVersion()) != BASSVERSION) LOG_WARNING("An incorrect version of bass.dll has been loaded"); + if (HIWORD(BASS_GetVersion()) != BASSVERSION) LOG_WARNING(0, "An incorrect version of bass.dll has been loaded"); TRACE("Creating main window..."); auto mainWnd = CreateMainWindow(hinst); if (!GetInstance().SoundSystem.Init(mainWnd)) SHOW_ERROR("CSoundSystem::Init() failed. Error code: %d", BASS_ErrorGetCode()); @@ -132,7 +132,7 @@ namespace CLEO BASS_Apply3D(); return true; } - LOG_WARNING("Could not initialize BASS sound system"); + LOG_WARNING(0, "Could not initialize BASS sound system"); return false; } @@ -236,7 +236,7 @@ namespace CLEO if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(src, 0, flags, 0, nullptr))) { - LOG_WARNING("Loading audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); + LOG_WARNING(0, "Loading audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); } else OK = true; } @@ -254,7 +254,7 @@ namespace CLEO if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(src, 0, flags, nullptr, nullptr))) { - LOG_WARNING("Loading 3d-audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); + LOG_WARNING(0, "Loading 3d-audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); } else { diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index 9030e5dc..8517c008 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -115,7 +115,7 @@ namespace CLEO } catch (std::exception& ex) { - LOG_WARNING("Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); + LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); } }); } @@ -137,7 +137,7 @@ namespace CLEO { if (!dynamic || fxt->second->is_static) { - LOG_WARNING("Attempting to add FXT \'%s\' - FAILED (GXT conflict)", key, value); + LOG_WARNING(0, "Attempting to add FXT \'%s\' - FAILED (GXT conflict)", key, value); return false; } From 26d93da1479ae002453356f7ea143fd9e87ec8de Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 12 Nov 2023 05:21:10 +0100 Subject: [PATCH 068/216] Bug fix of thread name print. (#31) --- source/CCustomOpcodeSystem.cpp | 6 +++--- source/CScriptEngine.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 0b450503..95e4bb7c 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1455,7 +1455,7 @@ namespace CLEO auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory - TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName()); + TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName().c_str()); auto cs = new CCustomScript(filename.c_str()); SetScriptCondResult(thread, cs && cs->IsOK()); @@ -1496,7 +1496,7 @@ namespace CLEO auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension - TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName()); + TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName().c_str()); auto cs = new CCustomScript(filename.c_str(), true); SetScriptCondResult(thread, cs && cs->IsOK()); @@ -1514,7 +1514,7 @@ namespace CLEO { if (cs) delete cs; SkipUnusedVarArgs(thread); - LOG_WARNING(0, "[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName()); + LOG_WARNING(0, "[0A94] Failed to load mission '%s' from script '%s'.", filename.c_str(), thread->GetName().c_str()); } return OR_CONTINUE; diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 44192259..6b01570d 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -252,7 +252,7 @@ namespace CLEO { if (!pScript->IsMission()) { - TRACE("Incorrect usage of opcode [004E] in script %s.", pScript->GetName()); + TRACE("Incorrect usage of opcode [004E] in script %s.", pScript->GetName().c_str()); } else *MissionLoaded = false; GetInstance().ScriptEngine.RemoveCustomScript(pScript); From a03ca230380312e206c01347911ff2e32f96292d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 12 Nov 2023 17:09:43 +0100 Subject: [PATCH 069/216] Inheriting compatibility mode from parent script. Deducing compatibility mode from script file extension. (#32) --- source/CCustomOpcodeSystem.cpp | 6 +-- source/CScriptEngine.cpp | 80 ++++++++++++++++++++++++++-------- source/CScriptEngine.h | 2 +- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 95e4bb7c..96342dd0 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1457,7 +1457,7 @@ namespace CLEO auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName().c_str()); - auto cs = new CCustomScript(filename.c_str()); + auto cs = new CCustomScript(filename.c_str(), false, thread); SetScriptCondResult(thread, cs && cs->IsOK()); if (cs && cs->IsOK()) { @@ -1498,7 +1498,7 @@ namespace CLEO filename += ".cm"; // add custom mission extension TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName().c_str()); - auto cs = new CCustomScript(filename.c_str(), true); + auto cs = new CCustomScript(filename.c_str(), true, thread); SetScriptCondResult(thread, cs && cs->IsOK()); if (cs && cs->IsOK()) { @@ -3415,7 +3415,7 @@ extern "C" } // if "label == 0" then "script_name" need to be the file name - auto cs = new CCustomScript(script_name, false, reinterpret_cast(fromThread), label); + auto cs = new CCustomScript(script_name, false, fromThread, label); if (fromThread) SetScriptCondResult(fromThread, cs && cs->IsOK()); if (cs && cs->IsOK()) { diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 6b01570d..936d0818 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1302,7 +1302,7 @@ namespace CLEO } // TODO: Consider split into 2 classes: CCustomExternalScript, CCustomChildScript - CCustomScript::CCustomScript(const char *szFileName, bool bIsMiss, CCustomScript *parent, int label) + CCustomScript::CCustomScript(const char *szFileName, bool bIsMiss, CRunningScript *parent, int label) : CRunningScript(), bSaveEnabled(false), bOK(false), LastSearchPed(0), LastSearchCar(0), LastSearchObj(0), CompatVer(CLEO_VER_CUR) @@ -1318,10 +1318,39 @@ namespace CLEO // store script file directory and name FS::path path = szFileName; path = FS::weakly_canonical(path); + + // deduce compatibility mode from filetype extension + if (path.extension() == cs4_ext) + CompatVer = CLEO_VER_4; + else + if (path.extension() == cs3_ext) + CompatVer = CLEO_VER_3; + + if(CompatVer == CLEO_VER_CUR && parent != nullptr && parent-IsCustom()) + { + // inherit compatibility mode from parent + CompatVer = ((CCustomScript*)parent)->GetCompatibility(); + + // try loading file with same compatibility mode filetype extension + auto compatPath = path; + if(CompatVer == CLEO_VER_4) + { + compatPath.replace_extension(cs4_ext); + if(FS::is_regular_file(compatPath)) + path = compatPath; + } + else + if (CompatVer == CLEO_VER_3) + { + compatPath.replace_extension(cs3_ext); + if (FS::is_regular_file(compatPath)) + path = compatPath; + } + } + scriptFileDir = path.parent_path().string(); scriptFileName = path.filename().string(); - - workDir = Filepath_Root; + workDir = Filepath_Root; // game root try { @@ -1331,17 +1360,22 @@ namespace CLEO if (!parent) throw std::logic_error("Trying to create external thread from label without parent thread"); - BaseIP = parent->GetBasePointer(); - CurrentIP = parent->GetBasePointer() - label; - memcpy(Name, parent->Name, sizeof(Name)); - dwChecksum = parent->dwChecksum; - parentThread = parent; - parent->childThreads.push_back(this); + if (!parent->IsCustom()) + throw std::logic_error("Only custom threads can spawn children threads from label"); + + auto cs = (CCustomScript*)parent; + + BaseIP = cs->GetBasePointer(); + CurrentIP = cs->GetBasePointer() - label; + memcpy(Name, cs->Name, sizeof(Name)); + dwChecksum = cs->dwChecksum; + parentThread = cs; + cs->childThreads.push_back(this); } else { using std::ios; - std::ifstream is(szFileName, std::ios::binary); + std::ifstream is(path.string().c_str(), std::ios::binary); is.exceptions(std::ios::badbit | std::ios::failbit); std::size_t length; is.seekg(0, std::ios::end); @@ -1360,23 +1394,33 @@ namespace CLEO } is.read(reinterpret_cast(BaseIP), length); - auto fname = strrchr(szFileName, '\\') + 1; - if (!fname) fname = strrchr(szFileName, '/') + 1; - if (fname < szFileName) fname = szFileName; - memcpy(Name, fname, sizeof(Name)); - Name[7] = '\0'; - dwChecksum = crc32(reinterpret_cast(BaseIP), length); + dwChecksum = crc32(reinterpret_cast(BaseIP), length); + + // thread name from filename + auto threadNamePath = path; + if(threadNamePath.extension() == cs3_ext || threadNamePath.extension() == cs4_ext) + { + threadNamePath.replace_extension(cs_ext); // keep original extension even in compatibility modes + } + auto fName = threadNamePath.filename().string(); + + memset(Name, '\0', sizeof(Name)); + if(!fName.empty()) + { + auto len = min(fName.length(), sizeof(Name) - 1); // and text terminator + memcpy(Name, fName.c_str(), len); + } } lastScriptCreated = this; bOK = true; } catch (std::exception& e) { - LOG_WARNING(0, "Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); + LOG_WARNING(0, "Error during loading of custom script %s occured.\nError message: %s", path.string().c_str(), e.what()); } catch (...) { - LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", szFileName); + LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", path.string().c_str()); } } diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index db9eb44d..ea4e5369 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -55,7 +55,7 @@ namespace CLEO inline DWORD& GetLastSearchPed() { return LastSearchPed; } inline DWORD& GetLastSearchVehicle() { return LastSearchCar; } inline DWORD& GetLastSearchObject() { return LastSearchObj; } - CCustomScript(const char *szFileName, bool bIsMiss = false, CCustomScript *parent = nullptr, int label = 0); + CCustomScript(const char *szFileName, bool bIsMiss = false, CRunningScript *parent = nullptr, int label = 0); ~CCustomScript(); void Process(); From 9fa345f898addb7b82fc825b58c7bbd1d456a822 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 12 Nov 2023 17:36:36 +0100 Subject: [PATCH 070/216] Fixed missing dependency (#33) --- cleo_sdk/CLEO.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 1c1482f6..5a02ca02 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -4,6 +4,9 @@ */ #pragma once +#ifdef __cplusplus +#include +#endif #include #define CLEO_VERSION_MAIN 5 From 8c9f17e593510edce8139d7c1a3cd03414601197 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 13 Nov 2023 20:10:24 +0100 Subject: [PATCH 071/216] New opcode cleo return with (#39) --- CHANGELOG.md | 2 ++ source/CCustomOpcodeSystem.cpp | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 285d290c..efb4f683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** + - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** + - **2003 ([cleo_return_false](https://library.sannybuilder.com/#/sa/CLEO/2003))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 96342dd0..f634ce50 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -126,6 +126,8 @@ namespace CLEO OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform OpcodeResult __stdcall opcode_2000(CRunningScript* thread); // resolve_filepath OpcodeResult __stdcall opcode_2001(CRunningScript* thread); // get_script_filename + OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with + OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_false typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { @@ -400,6 +402,8 @@ namespace CLEO CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform CLEO_RegisterOpcode(0x2000, opcode_2000); // resolve_filepath CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename + CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with + CLEO_RegisterOpcode(0x2002, opcode_2003); // cleo_return_false } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -3220,6 +3224,49 @@ namespace CLEO SetScriptCondResult(thread, true); return OR_CONTINUE; } + + //2002=-1, cleo_return_with ... + OpcodeResult __stdcall opcode_2002(CRunningScript* thread) + { + auto cs = reinterpret_cast(thread); + DWORD returnParamCount = GetVarArgCount(cs); + + if (returnParamCount) GetScriptParams(cs, returnParamCount); + + ScmFunction* scmFunc = ScmFunction::Store[cs->GetScmFunction()]; + scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here + if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + delete scmFunc; + + DWORD returnSlotCount = GetVarArgCount(cs); + if(returnParamCount != returnSlotCount) // new CLEO5 opcode, strict error checks + { + SHOW_ERROR("Opcode [2002] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(cs); + } + + if (returnSlotCount) SetScriptParams(cs, returnSlotCount); + cs->IncPtr(); // skip var args + + SetScriptCondResult(cs, true); + return OR_CONTINUE; + } + + //2003=0, cleo_return_false + OpcodeResult __stdcall opcode_2003(CRunningScript* thread) + { + auto cs = reinterpret_cast(thread); + + ScmFunction* scmFunc = ScmFunction::Store[cs->GetScmFunction()]; + scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here + if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + delete scmFunc; + + SkipUnusedVarArgs(thread); // just exit without change of return params + + SetScriptCondResult(cs, false); + return OR_CONTINUE; + } } From 19389e7171705f46196c5c61ab76164b342cd4d1 Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 14 Nov 2023 20:06:32 -0500 Subject: [PATCH 072/216] fix opcode id typo (#40) --- source/CCustomOpcodeSystem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index f634ce50..99f1a9cd 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -403,7 +403,7 @@ namespace CLEO CLEO_RegisterOpcode(0x2000, opcode_2000); // resolve_filepath CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with - CLEO_RegisterOpcode(0x2002, opcode_2003); // cleo_return_false + CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_false } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -3538,4 +3538,4 @@ extern "C" std::memcpy(buf, text.c_str(), text.length() + 1); // with terminator } -} \ No newline at end of file +} From 6265ebf4c4bf50a165e3d56d408dcb0710a9c640 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 15 Nov 2023 19:56:29 +0100 Subject: [PATCH 073/216] crash prevention in opcodes 0A8C and 0A8D (#42) --- source/CCustomOpcodeSystem.cpp | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 99f1a9cd..0693a3e7 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1368,20 +1368,27 @@ namespace CLEO OpcodeResult __stdcall opcode_0A8C(CRunningScript *thread) { GetScriptParams(thread, 4); - void *Address = opcodeParams[0].pParam; + void *address = opcodeParams[0].pParam; DWORD size = opcodeParams[1].dwParam; DWORD value = opcodeParams[2].dwParam; bool vp = opcodeParams[3].bParam; + + if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8C] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + switch (size) { default: - GetInstance().CodeInjector.MemoryWrite(Address, (BYTE)value, vp, size); + GetInstance().CodeInjector.MemoryWrite(address, (BYTE)value, vp, size); break; case 2: - GetInstance().CodeInjector.MemoryWrite(Address, (WORD)value, vp); + GetInstance().CodeInjector.MemoryWrite(address, (WORD)value, vp); break; case 4: - GetInstance().CodeInjector.MemoryWrite(Address, (DWORD)value, vp); + GetInstance().CodeInjector.MemoryWrite(address, (DWORD)value, vp); break; } return OR_CONTINUE; @@ -1391,26 +1398,31 @@ namespace CLEO OpcodeResult __stdcall opcode_0A8D(CRunningScript *thread) { GetScriptParams(thread, 3); - //DWORD value; - void *Address = opcodeParams[0].pParam; + void *address = opcodeParams[0].pParam; DWORD size = opcodeParams[1].dwParam; bool vp = opcodeParams[2].bParam; - opcodeParams[0].dwParam = 0; + if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8D] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + opcodeParams[0].dwParam = 0; switch (size) { case 1: - GetInstance().CodeInjector.MemoryRead(Address, (BYTE)opcodeParams[0].ucParam, vp); + GetInstance().CodeInjector.MemoryRead(address, (BYTE)opcodeParams[0].ucParam, vp); break; case 2: - GetInstance().CodeInjector.MemoryRead(Address, (WORD)opcodeParams[0].usParam, vp); + GetInstance().CodeInjector.MemoryRead(address, (WORD)opcodeParams[0].usParam, vp); break; case 4: - GetInstance().CodeInjector.MemoryRead(Address, (DWORD)opcodeParams[0].dwParam, vp); + GetInstance().CodeInjector.MemoryRead(address, (DWORD)opcodeParams[0].dwParam, vp); break; default: - SHOW_ERROR("Invalid size param (%d) of opcode [0A8D] in script %s", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid size param '%d' of opcode [0A8D] in script %s\nScript suspended.", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } SetScriptParams(thread, 1); From b527a7fbc5b414f973f758a19672117ce3383959 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 15 Nov 2023 22:32:49 +0100 Subject: [PATCH 074/216] Fix screen logging lines ending with space char (#41) --- cleo_plugins/DebugUtils/DebugUtils.ini | 6 +++--- cleo_plugins/DebugUtils/ScreenLog.h | 7 ++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.ini b/cleo_plugins/DebugUtils/DebugUtils.ini index 1353be88..df9726ca 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.ini +++ b/cleo_plugins/DebugUtils/DebugUtils.ini @@ -5,6 +5,6 @@ LegacyDebugOpcodes=0 [ScreenLog] ; Level: 0 - off, 1 - errors and warnings, 2 - debug messages, 3 - all Level=2 -MessageTime=4000 -MessagesMax=35 -FontSize=55 +MessageTime=3000 +MessagesMax=45 +FontSize=40 diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index 149e9e7b..fa84fbb1 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -84,6 +84,11 @@ class ScreenLog this->msg.push_back(c); } } + + if(!this->msg.empty() && this->msg.back() == ' ') // a bug(?) in game prevents drawing texts ending with whitespace + { + this->msg.back() = '_'; // '_' is drawn as empty character too + } } ResetTime(); @@ -101,7 +106,7 @@ class ScreenLog void ResetTime() { - timeLeft = min(msg.length(), 200) * 0.08f; // 12 letters peer second reading speed + timeLeft = min(msg.length(), 200) * 0.06f; // 16 letters peer second reading speed timeLeft = max(timeLeft, 0.001f * ScreenLog::timeDisplay); // not shorter than defined in config } From 7d9960eecbc83d394baa1b3b8e1a73578bce9024 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 17 Nov 2023 04:08:17 +0100 Subject: [PATCH 075/216] Fix load script path (#45) --- source/CCustomOpcodeSystem.cpp | 4 ++-- source/CScriptEngine.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 0693a3e7..52de740c 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -3474,7 +3474,7 @@ extern "C" } // if "label == 0" then "script_name" need to be the file name - auto cs = new CCustomScript(script_name, false, fromThread, label); + auto cs = new CCustomScript(filename.c_str(), false, fromThread, label); if (fromThread) SetScriptCondResult(fromThread, cs && cs->IsOK()); if (cs && cs->IsOK()) { @@ -3489,7 +3489,7 @@ extern "C" { if (cs) delete cs; if (fromThread) SkipUnusedVarArgs(fromThread); - LOG_WARNING(0, "Failed to load script '%s'.", script_name); + LOG_WARNING(0, "Failed to load script '%s'.", filename.c_str()); return nullptr; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 936d0818..8e711dbc 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1319,6 +1319,36 @@ namespace CLEO FS::path path = szFileName; path = FS::weakly_canonical(path); + // file exists? + if (!FS::is_regular_file(path)) + { + if(path.extension() == cs_ext) + { + // maybe it was renamed to enable compatibility mode? + auto compatPath = path; + + compatPath.replace_extension(cs4_ext); + if(FS::is_regular_file(compatPath)) + { + path = compatPath; + } + else + { + compatPath.replace_extension(cs3_ext); + if (FS::is_regular_file(compatPath)) + { + path = compatPath; + } + else + { + throw std::logic_error("File does not exists"); + } + } + } + else + throw std::logic_error("File does not exists"); + } + // deduce compatibility mode from filetype extension if (path.extension() == cs4_ext) CompatVer = CLEO_VER_4; From 989bda51a1d1dd41e0d87198e2537f2d8212b1a3 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 17 Nov 2023 21:31:24 +0100 Subject: [PATCH 076/216] fix 0AB1 support of string array input arguments (#47) --- cleo_sdk/CLEO.h | 22 +++++++++++----------- source/CCustomOpcodeSystem.cpp | 14 +++++++++++--- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 5a02ca02..fb92f0fe 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -49,26 +49,26 @@ enum eGameVersion : int // operand types enum eDataType : int { - DT_END, - DT_DWORD, // imm32 + DT_END, // variable args end marker + DT_DWORD, // literal int 32 DT_VAR, // globalVar $ DT_LVAR, // localVar @ - DT_BYTE, // imm8 - DT_WORD, // imm16 - DT_FLOAT, // imm32f + DT_BYTE, // literal int 8 + DT_WORD, // literal int 16 + DT_FLOAT, // literal float 32 DT_VAR_ARRAY, // globalArr $(,) DT_LVAR_ARRAY, // localArr @(,) - DT_TEXTLABEL, // sstring '' + DT_TEXTLABEL, // literal sstring '' DT_VAR_TEXTLABEL, // globalVarSString s$ DT_LVAR_TEXTLABEL, // localVarSString @s - DT_VAR_TEXTLABEL_ARRAY, - DT_LVAR_TEXTLABEL_ARRAY, - DT_VARLEN_STRING, // vstring "" + DT_VAR_TEXTLABEL_ARRAY, // globalVarSStringArr s$(,) + DT_LVAR_TEXTLABEL_ARRAY, // localVarSStringArr @s(,) + DT_VARLEN_STRING, // literal vstring "" DT_STRING, DT_VAR_STRING, // globalVarVString v$ DT_LVAR_STRING, // localVarVString @v - DT_VAR_STRING_ARRAY, - DT_LVAR_STRING_ARRAY + DT_VAR_STRING_ARRAY, // globalVarStringArr v$(,) + DT_LVAR_STRING_ARRAY // localVarStringArr @v(,) }; static const char* ToStr(eDataType type) { diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 52de740c..86780fe3 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -2244,7 +2244,6 @@ namespace CLEO if (nParams > 32) { SHOW_ERROR("Argument count %d is out of supported range (32) of opcode [0AB1] in script %s", nParams, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -2258,8 +2257,9 @@ namespace CLEO for (DWORD i = 0; i < min(nParams, 32); i++) { SCRIPT_VAR* arg = arguments + i; - - switch (*thread->GetBytePointer()) + + auto paramType = (eDataType)*thread->GetBytePointer(); + switch (paramType) { case DT_DWORD: case DT_WORD: @@ -2279,6 +2279,10 @@ namespace CLEO case DT_LVAR_STRING: case DT_VAR_TEXTLABEL: case DT_LVAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: arg->pParam = GetScriptParamPointer(thread); if (arg->pParam >= locals && arg->pParam < localsEnd) // correct scoped variable's pointer { @@ -2293,6 +2297,10 @@ namespace CLEO scmFunc->stringParams.emplace_back(ReadStringParam(thread)); // those texts exists in script code, but without terminator character. Copy is necessary arg->pcParam = (char*)scmFunc->stringParams.back().c_str(); break; + + default: + SHOW_ERROR("Invalid argument type '0x%02X' in opcode [0AB1] in script %s\nScript suspended.", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } From 7558fafa05a265458185bda0e3f7ddd46e502a71 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 18 Nov 2023 15:54:03 +0100 Subject: [PATCH 077/216] Fixed crash when creating script from label (#49) --- source/CScriptEngine.cpp | 218 +++++++++++++++++++++------------------ 1 file changed, 117 insertions(+), 101 deletions(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 8e711dbc..d9cea80e 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1301,128 +1301,144 @@ namespace CLEO }); } - // TODO: Consider split into 2 classes: CCustomExternalScript, CCustomChildScript + // TODO: Consider split into 2 classes: CCustomExternalScript, CCustomChildScript CCustomScript::CCustomScript(const char *szFileName, bool bIsMiss, CRunningScript *parent, int label) : CRunningScript(), bSaveEnabled(false), bOK(false), LastSearchPed(0), LastSearchCar(0), LastSearchObj(0), CompatVer(CLEO_VER_CUR) { + TRACE("Loading custom script %s...", szFileName); + bIsCustom = true; bIsMission = bUseMissionCleanup = bIsMiss; UseTextCommands = 0; NumDraws = 0; NumTexts = 0; - TRACE("Loading custom script %s...", szFileName); + try + { + std::ifstream is; + if (label != 0) // Create external from label. + { + if (!parent) + throw std::logic_error("Trying to create external thread from label without parent thread"); - // store script file directory and name - FS::path path = szFileName; - path = FS::weakly_canonical(path); + if (!parent->IsCustom()) + throw std::logic_error("Only custom threads can spawn children threads from label"); - // file exists? - if (!FS::is_regular_file(path)) - { - if(path.extension() == cs_ext) + auto cs = (CCustomScript*)parent; + + CompatVer = cs->GetCompatibility(); + bDebugMode = cs->GetDebugMode(); + scriptFileDir = cs->GetScriptFileDir(); + scriptFileName = cs->GetScriptFileName(); + workDir = cs->GetWorkDir(); + + BaseIP = cs->GetBasePointer(); + CurrentIP = cs->GetBasePointer() - label; + memcpy(Name, cs->Name, sizeof(Name)); + dwChecksum = cs->dwChecksum; + parentThread = cs; + cs->childThreads.push_back(this); + } + else { - // maybe it was renamed to enable compatibility mode? - auto compatPath = path; + // store script file directory and name + FS::path path = szFileName; + path = FS::weakly_canonical(path); - compatPath.replace_extension(cs4_ext); - if(FS::is_regular_file(compatPath)) - { - path = compatPath; - } - else + // file exists? + if (!FS::is_regular_file(path)) { - compatPath.replace_extension(cs3_ext); - if (FS::is_regular_file(compatPath)) + if (path.extension() == cs_ext) { - path = compatPath; + // maybe it was renamed to enable compatibility mode? + auto compatPath = path; + + compatPath.replace_extension(cs4_ext); + if (FS::is_regular_file(compatPath)) + { + path = compatPath; + } + else + { + compatPath.replace_extension(cs3_ext); + if (FS::is_regular_file(compatPath)) + { + path = compatPath; + } + else + { + throw std::logic_error("File does not exists"); + } + } } else - { throw std::logic_error("File does not exists"); - } } - } - else - throw std::logic_error("File does not exists"); - } - // deduce compatibility mode from filetype extension - if (path.extension() == cs4_ext) - CompatVer = CLEO_VER_4; - else - if (path.extension() == cs3_ext) - CompatVer = CLEO_VER_3; + // deduce compatibility mode from filetype extension + if (path.extension() == cs4_ext) + CompatVer = CLEO_VER_4; + else + if (path.extension() == cs3_ext) + CompatVer = CLEO_VER_3; - if(CompatVer == CLEO_VER_CUR && parent != nullptr && parent-IsCustom()) - { - // inherit compatibility mode from parent - CompatVer = ((CCustomScript*)parent)->GetCompatibility(); - - // try loading file with same compatibility mode filetype extension - auto compatPath = path; - if(CompatVer == CLEO_VER_4) - { - compatPath.replace_extension(cs4_ext); - if(FS::is_regular_file(compatPath)) - path = compatPath; - } - else - if (CompatVer == CLEO_VER_3) - { - compatPath.replace_extension(cs3_ext); - if (FS::is_regular_file(compatPath)) - path = compatPath; - } - } + if (CompatVer == CLEO_VER_CUR && parent != nullptr && parent - IsCustom()) + { + // inherit compatibility mode from parent + CompatVer = ((CCustomScript*)parent)->GetCompatibility(); - scriptFileDir = path.parent_path().string(); - scriptFileName = path.filename().string(); - workDir = Filepath_Root; // game root + // try loading file with same compatibility mode filetype extension + auto compatPath = path; + if (CompatVer == CLEO_VER_4) + { + compatPath.replace_extension(cs4_ext); + if (FS::is_regular_file(compatPath)) + path = compatPath; + } + else + if (CompatVer == CLEO_VER_3) + { + compatPath.replace_extension(cs3_ext); + if (FS::is_regular_file(compatPath)) + path = compatPath; + } + } - try - { - std::ifstream is; - if (label != 0) // Create external from label. - { - if (!parent) - throw std::logic_error("Trying to create external thread from label without parent thread"); + scriptFileDir = path.parent_path().string(); + scriptFileName = path.filename().string(); - if (!parent->IsCustom()) - throw std::logic_error("Only custom threads can spawn children threads from label"); + if(parent != nullptr) + { + bDebugMode = ((CCustomScript*)parent)->GetDebugMode(); + workDir = ((CCustomScript*)parent)->GetWorkDir(); + } + else + { + bDebugMode = GetInstance().ScriptEngine.NativeScriptsDebugMode; // global setting + workDir = Filepath_Root; // game root + } - auto cs = (CCustomScript*)parent; + using std::ios; + std::ifstream is(path.string().c_str(), std::ios::binary); + is.exceptions(std::ios::badbit | std::ios::failbit); + std::size_t length; + is.seekg(0, std::ios::end); + length = (size_t)is.tellg(); + is.seekg(0, std::ios::beg); - BaseIP = cs->GetBasePointer(); - CurrentIP = cs->GetBasePointer() - label; - memcpy(Name, cs->Name, sizeof(Name)); - dwChecksum = cs->dwChecksum; - parentThread = cs; - cs->childThreads.push_back(this); - } - else - { - using std::ios; - std::ifstream is(path.string().c_str(), std::ios::binary); - is.exceptions(std::ios::badbit | std::ios::failbit); - std::size_t length; - is.seekg(0, std::ios::end); - length = (size_t)is.tellg(); - is.seekg(0, std::ios::beg); - - if (bIsMiss) - { - if (*MissionLoaded) - throw std::logic_error("Starting of custom mission when other mission loaded"); - *MissionLoaded = 1; - BaseIP = CurrentIP = missionBlock; - } - else { - BaseIP = CurrentIP = new BYTE[length]; - } - is.read(reinterpret_cast(BaseIP), length); + if (bIsMiss) + { + if (*MissionLoaded) + throw std::logic_error("Starting of custom mission when other mission loaded"); + *MissionLoaded = 1; + BaseIP = CurrentIP = missionBlock; + } + else { + BaseIP = CurrentIP = new BYTE[length]; + } + is.read(reinterpret_cast(BaseIP), length); dwChecksum = crc32(reinterpret_cast(BaseIP), length); @@ -1440,26 +1456,26 @@ namespace CLEO auto len = min(fName.length(), sizeof(Name) - 1); // and text terminator memcpy(Name, fName.c_str(), len); } - } - lastScriptCreated = this; + } + lastScriptCreated = this; bOK = true; } catch (std::exception& e) { - LOG_WARNING(0, "Error during loading of custom script %s occured.\nError message: %s", path.string().c_str(), e.what()); + LOG_WARNING(0, "Error during loading of custom script %s occured.\nError message: %s", szFileName, e.what()); } catch (...) { - LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", path.string().c_str()); + LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", szFileName); } } CCustomScript::~CCustomScript() { if (BaseIP && !bIsMission) delete[] BaseIP; - RunScriptDeleteDelegate(reinterpret_cast(this)); - if (lastScriptCreated == this) lastScriptCreated = nullptr; + RunScriptDeleteDelegate(reinterpret_cast(this)); + if (lastScriptCreated == this) lastScriptCreated = nullptr; } - float VectorSqrMagnitude(CVector vector) { return vector.x * vector.x + vector.y * vector.y + vector.z * vector.z; } + float VectorSqrMagnitude(CVector vector) { return vector.x * vector.x + vector.y * vector.y + vector.z * vector.z; } } From 5035fdac9d3d75a0108658ff1f913813334116c1 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 19 Nov 2023 21:43:42 +0100 Subject: [PATCH 078/216] Fixed 0AB1 input argument count verification for string arrays. (#50) * Fixed 0AB1 input argument count verification for string arrays. Enabled using string array argument as module name in 0AB1. * Added function comments. * Updated parameter type checks. --- cleo_sdk/CLEO.h | 118 ++++++-- source/CCustomOpcodeSystem.cpp | 527 ++++++++++++--------------------- 2 files changed, 270 insertions(+), 375 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index fb92f0fe..1b912e19 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -97,42 +97,96 @@ static const char* ToStr(eDataType type) default: return "corrupted"; } } +static bool IsImmInteger(eDataType type) // immediate/literal integer in code like 42 +{ + switch (type) + { + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + return true; + } + return false; +} +static bool IsImmFloat(eDataType type) // immediate/literal float in code like 42.0 +{ + return type == DT_FLOAT; +} +static bool IsImmString(eDataType type) // immediate/literal string in code like "text" +{ + switch (type) + { + case DT_STRING: + case DT_TEXTLABEL: + case DT_VARLEN_STRING: + return true; + } + return false; +} +static bool IsVarString(eDataType type) // string variable +{ + switch (type) + { + case DT_LVAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_LVAR_STRING: + case DT_LVAR_STRING_ARRAY: + case DT_VAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING: + case DT_VAR_STRING_ARRAY: + return true; + } + return false; +} +static bool IsVariable(eDataType type) // can carry int, float, pointer to text +{ + switch (type) + { + case DT_VAR: + case DT_VAR_ARRAY: + case DT_LVAR: + case DT_LVAR_ARRAY: + return true; + } + return false; +} static const char* ToKindStr(eDataType type) { switch (type) { - case DT_BYTE: - case DT_WORD: - case DT_DWORD: - return "int"; break; - - case DT_FLOAT: - return "float"; break; - - case DT_STRING: - case DT_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL_ARRAY: - case DT_LVAR_STRING: - case DT_LVAR_STRING_ARRAY: - case DT_VAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_VAR_STRING: - case DT_VAR_STRING_ARRAY: - case DT_VARLEN_STRING: - return "string"; break; - - case DT_VAR: - case DT_VAR_ARRAY: - case DT_LVAR: - case DT_LVAR_ARRAY: - return "variable"; break; - - case DT_END: - return "varArgEnd"; break; - - default: - return "corrupted"; break; + case DT_BYTE: + case DT_WORD: + case DT_DWORD: + return "int"; break; + + case DT_FLOAT: + return "float"; break; + + case DT_STRING: + case DT_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_LVAR_STRING: + case DT_LVAR_STRING_ARRAY: + case DT_VAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_VARLEN_STRING: + return "string"; break; + + case DT_VAR: + case DT_VAR_ARRAY: + case DT_LVAR: + case DT_LVAR_ARRAY: + return "variable"; break; + + case DT_END: + return "varArgEnd"; break; + + default: + return "corrupted"; break; } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 86780fe3..e322b26d 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -492,20 +492,9 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, DWORD& uval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch(paramType) + if (!IsImmInteger(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - // integers - case DT_BYTE: - case DT_WORD: - case DT_DWORD: - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -516,20 +505,9 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, DWORD uval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch (paramType) + if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - // integers - /*case DT_BYTE: - case DT_WORD: - case DT_DWORD:*/ - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].dwParam = uval; @@ -540,20 +518,9 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, int& nval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch (paramType) + if (!IsImmInteger(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - // integers - case DT_BYTE: - case DT_WORD: - case DT_DWORD: - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -564,20 +531,9 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, int nval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch (paramType) + if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - // integers - /*case DT_BYTE: - case DT_WORD: - case DT_DWORD:*/ - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].nParam = nval; @@ -588,17 +544,9 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, float& fval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch (paramType) + if (!IsImmFloat(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - case DT_FLOAT: - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } GetScriptParams(&thread, 1); @@ -609,16 +557,9 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, float fval) { auto paramType = (eDataType)*thread.GetBytePointer(); - switch (paramType) + if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - case DT_LVAR: - case DT_LVAR_ARRAY: - case DT_VAR: - case DT_VAR_ARRAY: - break; - - default: - LOG_WARNING(&thread, "Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); + LOG_WARNING(&thread, "Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); } opcodeParams[0].fParam = fval; @@ -685,86 +626,62 @@ namespace CLEO lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); - switch(paramType) + if (IsImmInteger(paramType) || IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays { - // address of string buffer - case DT_DWORD: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: - { - GetScriptParams(thread, 1); - - if(opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) - { - lastErrorMsg = (opcodeParams[0].dwParam == 0) ? - "Reading string from 'null' pointer argument" : - stringPrintf("Reading string from invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + GetScriptParams(thread, 1); - return nullptr; // error, target buffer untouched - } - - char* str = opcodeParams[0].pcParam; - auto length = strlen(str); + if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) + { + lastErrorMsg = (opcodeParams[0].dwParam == 0) ? + "Reading string from 'null' pointer argument" : + stringPrintf("Reading string from invalid '0x%X' pointer argument", opcodeParams[0].dwParam); - if(length > bufLength) - { - lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); - length = bufLength; // clamp to target buffer size - } + return nullptr; // error, target buffer untouched + } - if (length) strncpy(buf, str, length); + char* str = opcodeParams[0].pcParam; + auto length = strlen(str); - if (bufSize > 0) buf[length] = '\0'; // string terminator - return buf; + if (length > bufLength) + { + lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + length = bufLength; // clamp to target buffer size } - // short string variable - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_LVAR_TEXTLABEL_ARRAY: + if (length) strncpy(buf, str, length); - // long string variable - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_STRING_ARRAY: - case DT_LVAR_STRING_ARRAY: - - // in-code string - case DT_TEXTLABEL: // sstring '' - case DT_STRING: - case DT_VARLEN_STRING: + if (bufSize > 0) buf[length] = '\0'; // string terminator + return buf; + } + else + if(IsImmString(paramType) || IsVarString(paramType)) + { + if (paramType == DT_VARLEN_STRING) { - if (paramType == DT_VARLEN_STRING) - { - // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 - thread->IncPtr(1); // already processed paramType - - DWORD length = (BYTE)*thread->GetBytePointer(); // as unsigned byte! - thread->IncPtr(1); // length info + // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 + thread->IncPtr(1); // already processed paramType - char* str = (char*)thread->GetBytePointer(); - thread->IncPtr(length); // text data + DWORD length = (BYTE)*thread->GetBytePointer(); // as unsigned byte! + thread->IncPtr(1); // length info - if (length > bufLength) - { - lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); - length = bufLength; // clamp to target buffer size - } + char* str = (char*)thread->GetBytePointer(); + thread->IncPtr(length); // text data - if (length) strncpy(buf, str, length); - if (bufSize > 0) buf[length] = '\0'; // string terminator - } - else + if (length > bufLength) { - GetScriptStringParam(thread, buf, (BYTE)min(bufSize, 0xFF)); // standard game's function + lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + length = bufLength; // clamp to target buffer size } - return buf; + if (length) strncpy(buf, str, length); + if (bufSize > 0) buf[length] = '\0'; // string terminator } - break; + else + { + GetScriptStringParam(thread, buf, (BYTE)min(bufSize, 0xFF)); // standard game's function + } + + return buf; } // unsupported param type @@ -797,44 +714,42 @@ namespace CLEO lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); - switch(paramType) + if (IsImmInteger(paramType) || IsVariable(paramType)) { // address to output buffer - case DT_DWORD: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: - GetScriptParams(thread, 1); + GetScriptParams(thread, 1); - if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) - { - lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); - return { nullptr, 0 }; // error - } - return { opcodeParams[0].pcParam, 0x7FFFFFFF }; // user allocated memory block can be any size - - // short string variable - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_LVAR_TEXTLABEL_ARRAY: - return { (char*)GetScriptParamPointer(thread), 8 }; - - // long string variable - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_STRING_ARRAY: - case DT_LVAR_STRING_ARRAY: - return { (char*)GetScriptParamPointer(thread), 16 }; - - default: + if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { - lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); - CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param + lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); return { nullptr, 0 }; // error } + return { opcodeParams[0].pcParam, 0x7FFFFFFF }; // user allocated memory block can be any size } + else + if (IsVarString(paramType)) + { + switch(paramType) + { + // short string variable + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: + return { (char*)GetScriptParamPointer(thread), 8 }; + + // long string variable + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: + return { (char*)GetScriptParamPointer(thread), 16 }; + } + } + + lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); + CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param + return { nullptr, 0 }; // error } // perform 'sprintf'-operation for parameters, passed through SCM @@ -1790,29 +1705,24 @@ namespace CLEO SCRIPT_VAR *arguments_end = arguments + numParams; // retrieve parameters - for (SCRIPT_VAR *arg = arguments; arg != arguments_end; ++arg) + for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) { - switch (*thread->GetBytePointer()) - { - case DT_FLOAT: - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: + auto paramType = (eDataType)*thread->GetBytePointer(); + if (IsImmInteger(paramType) || IsVariable(paramType)) *thread >> arg->dwParam; - break; - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - arg->pParam = GetScriptParamPointer(thread); - break; - case DT_VARLEN_STRING: - case DT_TEXTLABEL: + else + if (IsImmFloat(paramType)) + *thread >> arg->fParam; + else + if (IsImmString(paramType)) (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsVarString(paramType)) + arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? + else + { + SHOW_ERROR("Invalid param type (%s) in opcode [0AA5] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -1858,29 +1768,24 @@ namespace CLEO SCRIPT_VAR *arguments_end = arguments + numParams; // retrieve parameters - for (SCRIPT_VAR *arg = arguments; arg != arguments_end; ++arg) + for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) { - switch (*thread->GetBytePointer()) - { - case DT_FLOAT: - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: + auto paramType = (eDataType)*thread->GetBytePointer(); + if (IsImmInteger(paramType) || IsVariable(paramType)) *thread >> arg->dwParam; - break; - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - arg->pParam = GetScriptParamPointer(thread); - break; - case DT_VARLEN_STRING: - case DT_TEXTLABEL: - arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsImmFloat(paramType)) + *thread >> arg->fParam; + else + if (IsImmString(paramType)) + (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsVarString(paramType)) + arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? + else + { + SHOW_ERROR("Invalid param type (%s) in opcode [0AA6] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -1925,35 +1830,24 @@ namespace CLEO SCRIPT_VAR * arguments_end = arguments + numParams; // retrieve parameters - for (SCRIPT_VAR *arg = arguments; arg != arguments_end; ++arg) + for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) { - switch (*thread->GetBytePointer()) - { - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: + auto paramType = (eDataType)*thread->GetBytePointer(); + if (IsImmInteger(paramType) || IsVariable(paramType)) *thread >> arg->dwParam; - break; - - case DT_FLOAT: + else + if (IsImmFloat(paramType)) *thread >> arg->fParam; - break; - - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - arg->pParam = GetScriptParamPointer(thread); - break; - - case DT_VARLEN_STRING: - case DT_TEXTLABEL: - arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); - break; + else + if (IsImmString(paramType)) + (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsVarString(paramType)) + arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? + else + { + SHOW_ERROR("Invalid param type (%s) in opcode [0AA7] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -2002,34 +1896,24 @@ namespace CLEO SCRIPT_VAR *arguments_end = arguments + numParams; // retrieve parameters - for (SCRIPT_VAR *arg = arguments; arg != arguments_end; ++arg) + for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) { - switch (*thread->GetBytePointer()) - { - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: + auto paramType = (eDataType)*thread->GetBytePointer(); + if (IsImmInteger(paramType) || IsVariable(paramType)) *thread >> arg->dwParam; - break; - - case DT_FLOAT: + else + if (IsImmFloat(paramType)) *thread >> arg->fParam; - break; - - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - arg->pParam = GetScriptParamPointer(thread); - break; - - case DT_VARLEN_STRING: - case DT_TEXTLABEL: - arg->pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsImmString(paramType)) + (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); + else + if (IsVarString(paramType)) + arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? + else + { + SHOW_ERROR("Invalid param type (%s) in opcode [0AA8] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } } @@ -2146,38 +2030,20 @@ namespace CLEO char* moduleTxt = nullptr; auto paramType = (eDataType)*thread->GetBytePointer(); - switch (paramType) + if (IsImmInteger(paramType) || IsVariable(paramType)) { - // label of current script - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: - *thread >> label; - break; - - // string with module and export name - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - moduleTxt = GetScriptParamPointer(thread)->pcParam; - break; - - case DT_STRING: - case DT_TEXTLABEL: - case DT_VARLEN_STRING: - moduleTxt = ReadStringParam(thread); - break; - - default: - SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + *thread >> label; // label offset } - + else if (IsImmString(paramType) || IsVarString(paramType)) + { + moduleTxt = ReadStringParam(thread); // string with module and export name + } + else + { + SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + ScmFunction* scmFunc = new ScmFunction(thread); // parse module reference text @@ -2213,24 +2079,19 @@ namespace CLEO } // "number of input parameters" opcode argument - DWORD nParams; + DWORD nParams = 0; paramType = (eDataType)*thread->GetBytePointer(); - switch (paramType) + if (paramType != DT_END) { - case DT_END: - nParams = 0; - break; - - // literal integers - case DT_BYTE: - case DT_WORD: - case DT_DWORD: + if (IsImmInteger(paramType)) + { *thread >> nParams; - break; - - default: + } + else + { SHOW_ERROR("Invalid type of first argument in opcode [0AB1], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } } if (nParams) { @@ -2257,48 +2118,32 @@ namespace CLEO for (DWORD i = 0; i < min(nParams, 32); i++) { SCRIPT_VAR* arg = arguments + i; - + auto paramType = (eDataType)*thread->GetBytePointer(); - switch (paramType) + if (IsImmInteger(paramType) || IsVariable(paramType)) { - case DT_DWORD: - case DT_WORD: - case DT_BYTE: - case DT_VAR: - case DT_LVAR: - case DT_VAR_ARRAY: - case DT_LVAR_ARRAY: *thread >> arg->dwParam; - break; - - case DT_FLOAT: + } + else if(paramType == DT_FLOAT) + { *thread >> arg->fParam; - break; - - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - case DT_VAR_TEXTLABEL_ARRAY: - case DT_LVAR_TEXTLABEL_ARRAY: - case DT_VAR_STRING_ARRAY: - case DT_LVAR_STRING_ARRAY: + } + else if (IsVarString(paramType)) + { arg->pParam = GetScriptParamPointer(thread); if (arg->pParam >= locals && arg->pParam < localsEnd) // correct scoped variable's pointer { arg->dwParam -= (DWORD)locals; arg->dwParam += (DWORD)storedLocals; } - break; - - case DT_STRING: - case DT_TEXTLABEL: - case DT_VARLEN_STRING: - scmFunc->stringParams.emplace_back(ReadStringParam(thread)); // those texts exists in script code, but without terminator character. Copy is necessary + } + else if (IsImmString(paramType)) // those texts exists in script code, but without terminator character. Copy is necessary + { + scmFunc->stringParams.emplace_back(ReadStringParam(thread)); arg->pcParam = (char*)scmFunc->stringParams.back().c_str(); - break; - - default: + } + else + { SHOW_ERROR("Invalid argument type '0x%02X' in opcode [0AB1] in script %s\nScript suspended.", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } @@ -2337,22 +2182,13 @@ namespace CLEO DWORD returnParamCount = GetVarArgCount(thread); if (returnParamCount) { - DWORD declaredParamCount; - auto paramType = (eDataType)*thread->GetBytePointer(); - switch (paramType) + if (!IsImmInteger(paramType)) { - // literal integers - case DT_BYTE: - case DT_WORD: - case DT_DWORD: - *thread >> declaredParamCount; - break; - - default: SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } + DWORD declaredParamCount; *thread >> declaredParamCount; if(returnParamCount - 1 < declaredParamCount) // minus 'num args' itself { @@ -3397,9 +3233,14 @@ extern "C" break; case DT_VAR_ARRAY: case DT_LVAR_ARRAY: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: thread->IncPtr(6); break; case DT_BYTE: + //case DT_END: // should be only skipped with var args dediacated functions thread->IncPtr(); break; case DT_WORD: From 9fa9b5077fc92f483a6492f6a4c4d257521f71ac Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 3 Dec 2023 22:35:06 +0100 Subject: [PATCH 079/216] Implemented proper handling of game start-end events. (#48) --- cleo_sdk/CLEO.h | 9 +- source/CCodeInjector.h | 4 +- source/CGameVersionManager.cpp | 4 + source/CGameVersionManager.h | 4 + source/CScriptEngine.cpp | 273 ++++++++++----------------------- source/CScriptEngine.h | 12 +- source/CTextManager.cpp | 39 +++-- source/CTextManager.h | 4 + source/CleoBase.cpp | 122 +++++++++++++-- source/CleoBase.h | 30 +++- 10 files changed, 272 insertions(+), 229 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 1b912e19..b6c37259 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -246,9 +246,8 @@ const char DIR_MODULES[] = "modules:"; // game\cleo\modules directory // argument of CLEO_RegisterCallback enum class eCallbackId : DWORD { - ScmInit1, // void WINAPI OnScmInit1(); - ScmInit2, // void WINAPI OnScmInit2(); - ScmInit3, // void WINAPI OnScmInit3(); + GameBegin, // void WINAPI OnGameBegin(DWORD saveSlot); // -1 if not started from save + GameEnd, // void WINAPI OnGameEnd(); ScriptsLoaded, // void WINAPI OnScriptsLoaded(); ScriptsFinalize, // void WINAPI OnScriptsFinalize(); ScriptRegister, // void WINAPI OnScriptRegister(CRunningScript* pScript); // called after script creation @@ -266,8 +265,8 @@ enum class eLogLevel : DWORD { None, Error, // errors and warnings - Debug, // debug mode related - Default // all + Debug, // debug mode / user traces + Default // all log messages }; typedef int SCRIPT_HANDLE; diff --git a/source/CCodeInjector.h b/source/CCodeInjector.h index 1dc7ee40..8f0eccb2 100644 --- a/source/CCodeInjector.h +++ b/source/CCodeInjector.h @@ -51,10 +51,10 @@ namespace CLEO void CloseReadWriteAccess(); template - void ReplaceFunction(T *funcPtr, memory_pointer Position) + void ReplaceFunction(T *funcPtr, memory_pointer Position, T** origFuncPtr = nullptr) { TRACE("Replacing call: 0x%08X", (DWORD)Position); - MemCall((size_t)Position, (size_t)funcPtr); // *whistle* + MemCall((size_t)Position, (size_t)funcPtr, (size_t*)origFuncPtr); // *whistle* } template diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 64ed0ae6..1c48e62f 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -82,6 +82,10 @@ namespace CLEO { 0x00A44B68, memory_und, 0x00A44B68, 0x00A471E8, 0x00AB9C98 }, // MA_NUM_SCRIPT_TEXTS { 0x0058FCE4, memory_und, 0x0058FCE4, 0x005904B4, 0x0059E73C }, // MA_CALL_DRAW_SCRIPT_TEXTS_BEFORE_FADE { 0x0058D552, memory_und, 0x0058D552, 0x0058DD22, 0x0059BAD4 }, // MA_CALL_DRAW_SCRIPT_TEXTS_AFTER_FADE + { 0x00748E6B, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_SHUTDOWN TODO: find for other versions + { 0x0053C758, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_1 TODO: find for other versions + { 0x00748E04, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_2 TODO: find for other versions + { 0x00748E3E, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_3 TODO: find for other versions // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM { 0x008A6168, memory_und, 0x008A6168, 0x008A7450, 0x00913C20 }, // MA_OPCODE_HANDLER, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index de6bbadd..ed73ed6e 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -98,6 +98,10 @@ namespace CLEO MA_NUM_SCRIPT_TEXTS, MA_CALL_DRAW_SCRIPT_TEXTS_BEFORE_FADE, MA_CALL_DRAW_SCRIPT_TEXTS_AFTER_FADE, + MA_CALL_GAME_SHUTDOWN, + MA_CALL_GAME_RESTART_1, + MA_CALL_GAME_RESTART_2, + MA_CALL_GAME_RESTART_3, // CustomOpcodeSystem MA_OPCODE_HANDLER, diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index d9cea80e..01aba42c 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -184,67 +184,6 @@ namespace CLEO CRunningScript **inactiveThreadQueue, **activeThreadQueue; CCustomScript *lastScriptCreated = nullptr; - // called to initialise the scripts (after the main.scm has actually had a chance to set up) - void OnInitScm1(void) - { - TRACE("Scripts initialized"); - GetInstance().ScriptEngine.RemoveAllCustomScripts(); - InitScm(); - GetInstance().TextManager.ClearDynamicFxts(); - GetInstance().OpcodeSystem.FinalizeScriptObjects(); - GetInstance().SoundSystem.UnloadAllStreams(); - - GetInstance().ScriptEngine.Initialize(); - GetInstance().ModuleSystem.Clear(); - //GetInstance().ModuleSystem.LoadCleoModules(); // TODO: enbale if cleo_modules approved - GetInstance().ScriptEngine.LoadCustomScripts(false); - - for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit1)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } - } - - // called on first load before the others - void OnInitScm2(void) - { - TRACE("Scripts exclusively initialized"); - GetInstance().ScriptEngine.RemoveAllCustomScripts(); - InitScm(); - GetInstance().TextManager.ClearDynamicFxts(); - GetInstance().OpcodeSystem.FinalizeScriptObjects(); - GetInstance().SoundSystem.UnloadAllStreams(); - - GetInstance().ScriptEngine.Initialize(); - GetInstance().ScriptEngine.LoadCustomScripts(); - - for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit2)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } - } - - // called to load the scripts - void OnInitScm3(void) - { - TRACE("Scripts loaded"); - GetInstance().ScriptEngine.RemoveAllCustomScripts(); - InitScm(); - GetInstance().TextManager.ClearDynamicFxts(); - GetInstance().OpcodeSystem.FinalizeScriptObjects(); - GetInstance().SoundSystem.UnloadAllStreams(); - - GetInstance().ScriptEngine.Initialize(); - GetInstance().ScriptEngine.LoadCustomScripts(true); - - for (void* func : GetInstance().GetCallbacks(eCallbackId::ScmInit3)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } - } extern "C" void __stdcall opcode_004E(CCustomScript *pScript) { @@ -282,60 +221,15 @@ namespace CLEO } } - void OnNewGame(void) - { - static struct CGangWeapons { - BYTE _f0; - BYTE _f1; // - - DWORD weapon1; - DWORD weapon2; - DWORD weapon3; - } *gangWeapons((CGangWeapons *)0xC0B870); // 1.01 eu specific - TRACE("New game started"); - gangWeapons[0].weapon1 = 22; - gangWeapons[0].weapon2 = 28; - gangWeapons[0].weapon3 = 0; - - gangWeapons[1].weapon1 = 22; - gangWeapons[1].weapon2 = 0; - gangWeapons[1].weapon3 = 0; - - gangWeapons[2].weapon1 = 22; - gangWeapons[2].weapon2 = 0; - gangWeapons[2].weapon3 = 0; - - gangWeapons[4].weapon1 = 24; - gangWeapons[4].weapon2 = 28; - gangWeapons[4].weapon3 = 0; - - gangWeapons[5].weapon1 = 24; - gangWeapons[5].weapon2 = 0; - gangWeapons[5].weapon3 = 0; - - gangWeapons[6].weapon1 = 22; - gangWeapons[6].weapon2 = 30; - gangWeapons[6].weapon3 = 0; - - gangWeapons[7].weapon1 = 22; - gangWeapons[7].weapon2 = 28; - gangWeapons[7].weapon3 = 0; - - GetInstance().TextManager.ClearDynamicFxts(); - GetInstance().OpcodeSystem.FinalizeScriptObjects(); - GetInstance().ScriptEngine.RemoveAllCustomScripts(); - GetInstance().SoundSystem.UnloadAllStreams(); - GetInstance().ScriptEngine.LoadCustomScripts(); - } - void OnLoadScmData(void) { - TRACE(__FUNCSIG__); + TRACE("Loading scripts save data..."); LoadScmData(); } void OnSaveScmData(void) { - TRACE(__FUNCSIG__); + TRACE("Saving scripts save data..."); GetInstance().ScriptEngine.SaveState(); GetInstance().ScriptEngine.UnregisterAllScripts(); SaveScmData(); @@ -419,6 +313,8 @@ namespace CLEO void __fastcall HOOK_ProcessScript(CCustomScript * pScript, int) { + GetInstance().ScriptEngine.GameBegin(); // all initialized and ready to process scripts + // run registered callbacks bool process = true; for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptProcess)) @@ -858,7 +754,6 @@ namespace CLEO GetScriptStringParam = reinterpret_cast(_GetScriptStringParam); GetScriptParamPointer2 = reinterpret_cast(_GetScriptParamPointer2); - InitScm = gvm.TranslateMemoryAddress(MA_INIT_SCM_FUNCTION); SaveScmData = gvm.TranslateMemoryAddress(MA_SAVE_SCM_DATA_FUNCTION); LoadScmData = gvm.TranslateMemoryAddress(MA_LOAD_SCM_DATA_FUNCTION); @@ -892,18 +787,6 @@ namespace CLEO activeThreadQueue = gvm.TranslateMemoryAddress(MA_ACTIVE_THREAD_QUEUE); staticThreads = gvm.TranslateMemoryAddress(MA_STATIC_THREADS); - if (gvm.GetGameVersion() == GV_EU11) - { - inj.ReplaceFunction(OnInitScm3, gvm.TranslateMemoryAddress(MA_CALL_INIT_SCM3)); - inj.InjectFunction(OnNewGame, 0x5DEEA0); // GV_EU11 specific - } - else - { - inj.ReplaceFunction(OnInitScm1, gvm.TranslateMemoryAddress(MA_CALL_INIT_SCM1)); - inj.ReplaceFunction(OnInitScm2, gvm.TranslateMemoryAddress(MA_CALL_INIT_SCM2)); - inj.ReplaceFunction(OnInitScm3, gvm.TranslateMemoryAddress(MA_CALL_INIT_SCM3)); - } - inj.ReplaceFunction(OnLoadScmData, gvm.TranslateMemoryAddress(MA_CALL_LOAD_SCM_DATA)); inj.ReplaceFunction(OnSaveScmData, gvm.TranslateMemoryAddress(MA_CALL_SAVE_SCM_DATA)); inj.InjectFunction(&opcode_004E_hook, gvm.TranslateMemoryAddress(MA_OPCODE_004E)); @@ -916,8 +799,7 @@ namespace CLEO CScriptEngine::~CScriptEngine() { - TRACE("Unloading scripts..."); - RemoveAllCustomScripts(); + GameEnd(); } CleoSafeHeader safe_header; @@ -926,8 +808,12 @@ namespace CLEO std::unique_ptr safe_info_utilizer; std::unique_ptr stopped_info_utilizer; - void CScriptEngine::Initialize() + void CScriptEngine::GameBegin() { + if(gameInProgress) return; // already started + if(activeThreadQueue == nullptr || activeThreadQueue[0] == nullptr) return; // main gamescript not loaded yet + gameInProgress = true; + if (CGame::bMissionPackGame == 0) // regular main game { MainScriptFileDir = FS::path(Filepath_Cleo).append("data\\script").string(); @@ -941,66 +827,26 @@ namespace CLEO NativeScriptsDebugMode = GetPrivateProfileInt("General", "DebugMode", 0, Filepath_Config.c_str()) != 0; MainScriptCurWorkDir = Filepath_Root; + + GetInstance().ModuleSystem.LoadCleoModules(); + LoadState(GetInstance().saveSlot); + LoadCustomScripts(); } - void CScriptEngine::LoadCustomScripts(bool load_mode) + void CScriptEngine::GameEnd() { - // steam offset is different, so get it manually for now - CGameVersionManager& gvm = GetInstance().VersionManager; - int nSlot = gvm.GetGameVersion() != GV_STEAM ? *(BYTE*)&MenuManager->m_nSelectedSaveGame : *((BYTE*)MenuManager + 0x15B); - - auto saveFile = FS::path(Filepath_Cleo).append(stringPrintf("cleo_saves\\cs%d.sav", nSlot)).string(); - - safe_info = nullptr; - stopped_info = nullptr; - safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; + if (!gameInProgress) return; + gameInProgress = false; - if (load_mode) - { - // load cleo saving file - try - { - TRACE("Loading cleo safe %s", saveFile.c_str()); - std::ifstream ss(saveFile.c_str(), std::ios::binary); - if (ss.is_open()) - { - ss.exceptions(std::ios::eofbit | std::ios::badbit | std::ios::failbit); - ReadBinary(ss, safe_header); - if (safe_header.signature != CleoSafeHeader::sign) - throw std::runtime_error("Invalid file format"); - safe_info = new ThreadSavingInfo[safe_header.n_saved_threads]; - safe_info_utilizer.reset(safe_info); - stopped_info = new unsigned long[safe_header.n_stopped_threads]; - stopped_info_utilizer.reset(stopped_info); - ReadBinary(ss, CleoVariables, 0x400); - ReadBinary(ss, safe_info, safe_header.n_saved_threads); - ReadBinary(ss, stopped_info, safe_header.n_stopped_threads); - for (size_t i = 0; i < safe_header.n_stopped_threads; ++i) - InactiveScriptHashes.insert(stopped_info[i]); - TRACE("Finished. Loaded %u cleo variables, %u saved threads info, %u stopped threads info", - 0x400, safe_header.n_saved_threads, safe_header.n_stopped_threads); - } - else - { - memset(CleoVariables, 0, sizeof(CleoVariables)); - } - } - catch (std::exception& ex) - { - TRACE("Loading of cleo safe %s failed: %s", saveFile.c_str(), ex.what()); - safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; - memset(CleoVariables, 0, sizeof(CleoVariables)); - } - } - else - { - memset(CleoVariables, 0, sizeof(CleoVariables)); - } + RemoveAllCustomScripts(); + GetInstance().ModuleSystem.Clear(); + memset(CleoVariables, 0, sizeof(CleoVariables)); + } + void CScriptEngine::LoadCustomScripts() + { TRACE("Searching for CLEO scripts"); - std::string scriptsDir = "cleo"; // TODO: use Filepath_Cleo instead ModLoader is updated to support CLEO5 - - FilesWalk(scriptsDir.c_str(), cs_ext, [&](const char* fullPath, const char* filename) + FilesWalk(Filepath_Cleo.c_str(), cs_ext, [&](const char* fullPath, const char* filename) { if (auto cs = LoadScript(fullPath)) { @@ -1008,7 +854,7 @@ namespace CLEO } }); - FilesWalk(scriptsDir.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) + FilesWalk(Filepath_Cleo.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) { if (auto cs = LoadScript(fullPath)) { @@ -1017,7 +863,7 @@ namespace CLEO } }); - FilesWalk(scriptsDir.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) + FilesWalk(Filepath_Cleo.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) { if (auto cs = LoadScript(fullPath)) { @@ -1081,6 +927,54 @@ namespace CLEO return cs; } + void CScriptEngine::LoadState(int saveSlot) + { + memset(CleoVariables, 0, sizeof(CleoVariables)); + + if(saveSlot == -1) return; + + auto saveFile = FS::path(Filepath_Cleo).append(stringPrintf("cleo_saves\\cs%d.sav", saveSlot)).string(); + + safe_info = nullptr; + stopped_info = nullptr; + safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; + + // load cleo saving file + try + { + TRACE("Loading cleo safe %s", saveFile.c_str()); + std::ifstream ss(saveFile.c_str(), std::ios::binary); + if (ss.is_open()) + { + ss.exceptions(std::ios::eofbit | std::ios::badbit | std::ios::failbit); + ReadBinary(ss, safe_header); + if (safe_header.signature != CleoSafeHeader::sign) + throw std::runtime_error("Invalid file format"); + safe_info = new ThreadSavingInfo[safe_header.n_saved_threads]; + safe_info_utilizer.reset(safe_info); + stopped_info = new unsigned long[safe_header.n_stopped_threads]; + stopped_info_utilizer.reset(stopped_info); + ReadBinary(ss, CleoVariables, 0x400); + ReadBinary(ss, safe_info, safe_header.n_saved_threads); + ReadBinary(ss, stopped_info, safe_header.n_stopped_threads); + for (size_t i = 0; i < safe_header.n_stopped_threads; ++i) + InactiveScriptHashes.insert(stopped_info[i]); + TRACE("Finished. Loaded %u cleo variables, %u saved threads info, %u stopped threads info", + 0x400, safe_header.n_saved_threads, safe_header.n_stopped_threads); + } + else + { + memset(CleoVariables, 0, sizeof(CleoVariables)); + } + } + catch (std::exception& ex) + { + TRACE("Loading of cleo safe %s failed: %s", saveFile.c_str(), ex.what()); + safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; + memset(CleoVariables, 0, sizeof(CleoVariables)); + } + } + void CScriptEngine::SaveState() { try @@ -1255,23 +1149,20 @@ namespace CLEO void CScriptEngine::RemoveAllCustomScripts(void) { + TRACE("Unloading scripts..."); + InactiveScriptHashes.clear(); - std::for_each(CustomScripts.begin(), CustomScripts.end(), [this](CCustomScript *cs) { - TRACE("Unregistering custom script named %s", cs->GetName().c_str()); - RemoveScriptFromQueue(cs, activeThreadQueue); - //AddScriptToQueue(cs, inactiveThreadQueue); - //if(cs->GetPrev()) cs->GetPrev()->SetNext(nullptr); - //if(cs->GetNext()) cs->GetNext()->SetPrev(nullptr); - //TRACE("Psyke!!"); - cs->SetActive(false); - delete cs; - }); + + UnregisterAllScripts(); CustomScripts.clear(); - std::for_each(ScriptsWaitingForDelete.begin(), ScriptsWaitingForDelete.end(), [this](CCustomScript *cs) { + + std::for_each(ScriptsWaitingForDelete.begin(), ScriptsWaitingForDelete.end(), [this](CCustomScript *cs) + { TRACE("Deleting inactive script named %s", cs->GetName().c_str()); delete cs; }); ScriptsWaitingForDelete.clear(); + if (CustomMission) { TRACE("Unregistering custom mission named %s", CustomMission->GetName().c_str()); diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index ea4e5369..dcf18703 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -68,6 +68,7 @@ namespace CLEO void StoreScriptDraws(); void RestoreScriptDraws(); + void LoadCleoSave(int slot); void StoreScriptCustoms(); void RestoreScriptCustoms(); @@ -96,6 +97,8 @@ namespace CLEO class CScriptEngine : VInjectible { + bool gameInProgress = false; + friend class CCustomScript; std::list CustomScripts; std::list ScriptsWaitingForDelete; @@ -115,9 +118,14 @@ namespace CLEO ~CScriptEngine(); virtual void Inject(CCodeInjector&); - void Initialize(); // call after new game started - void LoadCustomScripts(bool bMode = false); + void GameBegin(); // call after new game started + void GameEnd(); + + void LoadCustomScripts(); + + // CLEO saves + void LoadState(int saveSlot); void SaveState(); CRunningScript* FindScriptNamed(const char *); diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index 8517c008..e14cdc02 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -103,21 +103,6 @@ namespace CLEO CTextManager::CTextManager() : fxts(1, crc32FromUpcaseStdString) { - // parse FXT files - auto path = FS::path(Filepath_Cleo).append("cleo_text").string(); - FilesWalk(path.c_str(), ".fxt", [this](const char* fullPath, const char* filename) - { - TRACE("Parsing FXT file %s", fullPath); - try - { - std::ifstream stream(fullPath); - ParseFxtFile(stream); - } - catch (std::exception& ex) - { - LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); - } - }); } const char* CTextManager::Get(const char* key) @@ -200,6 +185,30 @@ namespace CLEO // TRACE("Deleting finished, %d elements erased", count); } + void CTextManager::LoadFxts() + { + // load whole FXT files directory + auto path = FS::path(Filepath_Cleo).append("cleo_text").string(); + FilesWalk(path.c_str(), ".fxt", [this](const char* fullPath, const char* filename) + { + TRACE("Parsing FXT file %s", fullPath); + try + { + std::ifstream stream(fullPath); + ParseFxtFile(stream); + } + catch (std::exception& ex) + { + LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); + } + }); + } + + void CTextManager::Clear() + { + fxts.clear(); + } + void CTextManager::Inject(CCodeInjector& inj) { TRACE("Injecting TextManager..."); diff --git a/source/CTextManager.h b/source/CTextManager.h index fe41286c..bd86cd90 100644 --- a/source/CTextManager.h +++ b/source/CTextManager.h @@ -25,6 +25,10 @@ namespace CLEO public: CTextManager(); ~CTextManager(); + + void LoadFxts(); + void Clear(); + const char* Get(const char* key); bool AddFxt(const char *key, const char *value, bool dynamic = true); bool RemoveFxt(const char *key); diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 2ef3bb71..83e96063 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -1,7 +1,6 @@ #include "stdafx.h" #include "CleoBase.h" - namespace CLEO { CCleoInstance CleoInstance; @@ -26,15 +25,63 @@ namespace CLEO _asm jmp dwFunc } + void CCleoInstance::OnScmInit1() + { + auto& base = GetInstance(); + base.ScmInit1_Orig(); // call original + base.GameBegin(); + } + + void CCleoInstance::OnScmInit2() // load save + { + auto& base = GetInstance(); + base.ScmInit2_Orig(); // call original + base.GameBegin(); + } + + void CCleoInstance::OnScmInit3() + { + auto& base = GetInstance(); + base.ScmInit3_Orig(); // call original + base.GameBegin(); + } + + void __declspec(naked) CCleoInstance::OnGameShutdown() + { + GetInstance().GameEnd(); + static DWORD oriFunc; + oriFunc = (DWORD)(GetInstance().GameShutdown); + _asm jmp oriFunc + } + + void __declspec(naked) CCleoInstance::OnGameRestart1() + { + GetInstance().GameEnd(); + static DWORD oriFunc; + oriFunc = (DWORD)(GetInstance().GameRestart1); + _asm jmp oriFunc + } + + void __declspec(naked) CCleoInstance::OnGameRestart2() + { + GetInstance().GameEnd(); + static DWORD oriFunc; + oriFunc = (DWORD)(GetInstance().GameRestart2); + _asm jmp oriFunc + } + + void __declspec(naked) CCleoInstance::OnGameRestart3() + { + GetInstance().GameEnd(); + static DWORD oriFunc; + oriFunc = (DWORD)(GetInstance().GameRestart3); + _asm jmp oriFunc + } + void CCleoInstance::Start() { if (m_bStarted) return; // already started - - /*if (FS::current_path() != Filepath_Root) - { - MessageBox(NULL, "CLEO.asi has to be placed in game's root directory!", "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); - exit(1); // terminate the game - }*/ + m_bStarted = true; FS::create_directory(Filepath_Cleo); FS::create_directory(FS::path(Filepath_Cleo).append("cleo_modules")); @@ -45,24 +92,75 @@ namespace CLEO CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); - UpdateGameLogics = VersionManager.TranslateMemoryAddress(MA_UPDATE_GAME_LOGICS_FUNCTION); - CodeInjector.ReplaceFunction(&OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS)); TextManager.Inject(CodeInjector); SoundSystem.Inject(CodeInjector); OpcodeSystem.Inject(CodeInjector); ScriptEngine.Inject(CodeInjector); + CodeInjector.ReplaceFunction(&OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS), &UpdateGameLogics); + + CodeInjector.ReplaceFunction(OnScmInit1, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM1), &ScmInit1_Orig); + CodeInjector.ReplaceFunction(OnScmInit2, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM2), &ScmInit2_Orig); + CodeInjector.ReplaceFunction(OnScmInit3, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM3), &ScmInit3_Orig); + + CodeInjector.ReplaceFunction(OnGameShutdown, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_SHUTDOWN), &GameShutdown); + + CodeInjector.ReplaceFunction(OnGameRestart1, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_1), &GameRestart1); + CodeInjector.ReplaceFunction(OnGameRestart2, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_2), &GameRestart2); + CodeInjector.ReplaceFunction(OnGameRestart3, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_3), &GameRestart3); + CodeInjector.ReplaceFunction(OnDrawingFinished, 0x00734640); // nullsub_63 - originally something like renderDebugStuff? - m_bStarted = true; TRACE("CLEO instance started successfully!"); } void CCleoInstance::Stop() { if (!m_bStarted) return; - m_bStarted = false; + + ScriptEngine.GameEnd(); + } + + void CCleoInstance::GameBegin() + { + if (m_bGameInProgress) return; + m_bGameInProgress = true; + + saveSlot = MenuManager->m_bWantToLoad ? MenuManager->m_nSelectedSaveGame : -1; + + TRACE("Starting new game, save slot: %d", saveSlot); + + // execute registered callbacks + for (void* func : GetInstance().GetCallbacks(eCallbackId::GameBegin)) + { + typedef void WINAPI callback(DWORD); + ((callback*)func)((DWORD)saveSlot); + } + + TextManager.LoadFxts(); + } + + void CCleoInstance::GameEnd() + { + if (!m_bGameInProgress) return; + m_bGameInProgress = false; + + TRACE("Ending current game"); + + // execute registered callbacks + for (void* func : GetInstance().GetCallbacks(eCallbackId::GameEnd)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } + + ScriptEngine.GameEnd(); + OpcodeSystem.FinalizeScriptObjects(); + SoundSystem.UnloadAllStreams(); + TextManager.Clear(); + + saveSlot = -1; } void CCleoInstance::AddCallback(eCallbackId id, void* func) @@ -82,7 +180,7 @@ namespace CLEO void __cdecl CCleoInstance::OnDrawingFinished() { - // execute callbacks + // execute registered callbacks for (void* func : GetInstance().GetCallbacks(eCallbackId::DrawingFinished)) { typedef void WINAPI callback(void); diff --git a/source/CleoBase.h b/source/CleoBase.h index e54181c4..b449b577 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -19,6 +19,7 @@ namespace CLEO class CCleoInstance { bool m_bStarted; + bool m_bGameInProgress; std::map> m_callbacks; public: @@ -34,13 +35,18 @@ namespace CLEO CPluginSystem PluginSystem; //CLegacy Legacy; - HWND MainWnd; + HWND MainWnd = NULL; + int saveSlot = -1; // -1 if not loaded from save + CCleoInstance(); virtual ~CCleoInstance(); void Start(); void Stop(); + void GameBegin(); + void GameEnd(); + bool IsStarted() const { return m_bStarted; } void AddCallback(eCallbackId id, void* func); @@ -48,8 +54,28 @@ namespace CLEO static void __cdecl OnDrawingFinished(); - void(__cdecl * UpdateGameLogics)(); + void(__cdecl * UpdateGameLogics)() = nullptr; static void __cdecl OnUpdateGameLogics(); + + // calls to CTheScripts::Init + void(__cdecl* ScmInit1_Orig)() = nullptr; + void(__cdecl* ScmInit2_Orig)() = nullptr; + void(__cdecl* ScmInit3_Orig)() = nullptr; + static void OnScmInit1(); + static void OnScmInit2(); + static void OnScmInit3(); + + // call for Game::Shutdown + void(__cdecl* GameShutdown)() = nullptr; + static void __cdecl OnGameShutdown(); + + // calls for Game::ShutDownForRestart + void(__cdecl* GameRestart1)() = nullptr; + void(__cdecl* GameRestart2)() = nullptr; + void(__cdecl* GameRestart3)() = nullptr; + static void __cdecl OnGameRestart1(); + static void __cdecl OnGameRestart2(); + static void __cdecl OnGameRestart3(); }; CCleoInstance& GetInstance(); From 44b761b5052e3c73f1131adafd84fbd6ab3cf2e0 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 20 Dec 2023 15:04:52 +0100 Subject: [PATCH 080/216] new return opcodes (#44) * Stack validation in cleo_return opcodes ScmFunction moved to separate files cleo_return_with renamed to cleo_return_true cleo_return_fallse now returns parameters new opcode cleo_return_none * Return opcodes updated. --- CHANGELOG.md | 2 +- CLEO5.vcxproj | 2 + CLEO5.vcxproj.filters | 6 + source/CCustomOpcodeSystem.cpp | 235 ++++++++++----------------------- source/CCustomOpcodeSystem.h | 2 +- source/ScmFunction.cpp | 119 +++++++++++++++++ source/ScmFunction.h | 39 ++++++ 7 files changed, 240 insertions(+), 165 deletions(-) create mode 100644 source/ScmFunction.cpp create mode 100644 source/ScmFunction.h diff --git a/CHANGELOG.md b/CHANGELOG.md index efb4f683..f489763c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** - - **2003 ([cleo_return_false](https://library.sannybuilder.com/#/sa/CLEO/2003))** + - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index c82564ea..850780ea 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -53,6 +53,7 @@ + Create Create @@ -77,6 +78,7 @@ + diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index dc072f49..79d21bd5 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -99,6 +99,9 @@ source\utils + + source\extensions + @@ -161,6 +164,9 @@ source\utils + + source\extensions + diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e322b26d..482dba5b 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -3,6 +3,7 @@ #include "CLegacy.h" #include "CGameVersionManager.h" #include "CCustomOpcodeSystem.h" +#include "ScmFunction.h" #include "CTextManager.h" #include "CModelInfo.h" @@ -127,7 +128,7 @@ namespace CLEO OpcodeResult __stdcall opcode_2000(CRunningScript* thread); // resolve_filepath OpcodeResult __stdcall opcode_2001(CRunningScript* thread); // get_script_filename OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with - OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_false + OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_fail typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { @@ -263,6 +264,44 @@ namespace CLEO return (callbackResult != OR_NONE) ? callbackResult : result; } + OpcodeResult CCustomOpcodeSystem::CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs) + { + auto cs = reinterpret_cast(thread); + + ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); + if (scmFunc == nullptr) + { + SHOW_ERROR("Invalid Cleo Call reference. [%04X] possibly used without preceding [0AB1] in script %s\nScript suspended.", opcode, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + DWORD returnParamCount = 0; + if(returnArgs) + { + returnParamCount = GetVarArgCount(cs); + if (returnParamCount) GetScriptParams(cs, returnParamCount); + } + + scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here + if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + delete scmFunc; + + if (returnArgs) + { + DWORD returnSlotCount = GetVarArgCount(cs); + if (returnParamCount != returnSlotCount) // new CLEO5 opcode, strict error checks + { + SHOW_ERROR("Opcode [%04X] returned %d params, while function caller expected %d in script %s\nScript suspended.", opcode, returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(cs); + } + + if (returnSlotCount) SetScriptParams(cs, returnSlotCount); + cs->IncPtr(); // skip var args + } + + return OR_CONTINUE; + } + OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) { //thread->SetActive(false): // will crash game if no active script left @@ -295,7 +334,7 @@ namespace CLEO m_hNativeLibs.clear(); // clean up after opcode_0AB1 - ResetScmFunctionStore(); + ScmFunction::Clear(); // clean up after opcode_0AC8 std::for_each(m_pAllocations.begin(), m_pAllocations.end(), free); @@ -403,7 +442,7 @@ namespace CLEO CLEO_RegisterOpcode(0x2000, opcode_2000); // resolve_filepath CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with - CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_false + CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -1151,130 +1190,6 @@ namespace CLEO return count; } - struct ScmFunction - { - unsigned short prevScmFunctionId, thisScmFunctionId; - void* savedBaseIP; - BYTE *retnAddress; - BYTE* savedStack[8]; // gosub stack - WORD savedSP; - SCRIPT_VAR savedTls[32]; - std::list stringParams; // texts with this scope lifetime - bool savedCondResult; - eLogicalOperation savedLogicalOp; - bool savedNotFlag; - static const size_t store_size = 0x400; - static ScmFunction *Store[store_size]; - static size_t allocationPlace; // contains an index of last allocated object - void* moduleExportRef = 0; // modules switching. Points to modules baseIP in case if this is export call - std::string savedScriptFileDir; // modules switching - std::string savedScriptFileName; // modules switching - - void *operator new(size_t size) - { - size_t start_search = allocationPlace; - while (Store[allocationPlace]) // find first unused position in store - { - if (++allocationPlace >= store_size) allocationPlace = 0; // end of store reached - if (allocationPlace == start_search) - { - SHOW_ERROR("CLEO function storage stack overfllow!"); - throw std::bad_alloc(); // the store is filled up - } - } - ScmFunction *obj = reinterpret_cast(::operator new(size)); - Store[allocationPlace] = obj; - return obj; - } - - void operator delete(void *mem) - { - Store[reinterpret_cast(mem)->thisScmFunctionId] = nullptr; - ::operator delete(mem); - } - - ScmFunction(CRunningScript *thread) : - prevScmFunctionId(reinterpret_cast(thread)->GetScmFunction()) - { - auto cs = reinterpret_cast(thread); - - // create snapshot of current scope - savedBaseIP = cs->BaseIP; - std::copy(std::begin(cs->Stack), std::end(cs->Stack), std::begin(savedStack)); - savedSP = cs->SP; - - auto scope = cs->IsMission() ? missionLocals : cs->LocalVar; - std::copy(scope, scope + 32, savedTls); - - savedCondResult = cs->bCondResult; - savedLogicalOp = cs->LogicalOp; - savedNotFlag = cs->NotFlag; - - savedScriptFileDir = cs->GetScriptFileDir(); - savedScriptFileName = cs->GetScriptFileName(); - - // init new scope - std::fill(std::begin(cs->Stack), std::end(cs->Stack), nullptr); - cs->SP = 0; - cs->bCondResult = false; - cs->LogicalOp = eLogicalOperation::NONE; - cs->NotFlag = false; - - cs->SetScmFunction(thisScmFunctionId = (unsigned short)allocationPlace); - } - - void Return(CRunningScript *thread) - { - auto cs = reinterpret_cast(thread); - - // restore parent scope's gosub call stack - cs->BaseIP = savedBaseIP; - std::copy(std::begin(savedStack), std::end(savedStack), std::begin(cs->Stack)); - cs->SP = savedSP; - - // restore parent scope's local variables - std::copy(savedTls, savedTls + 32, cs->IsMission() ? missionLocals : cs->LocalVar); - - // process conditional result of just ended function in parent scope - bool condResult = cs->bCondResult; - if (savedNotFlag) condResult = !condResult; - - if (savedLogicalOp >= eLogicalOperation::AND_2 && savedLogicalOp < eLogicalOperation::AND_END) - { - cs->bCondResult = savedCondResult && condResult; - cs->LogicalOp = --savedLogicalOp; - } - else if(savedLogicalOp >= eLogicalOperation::OR_2 && savedLogicalOp < eLogicalOperation::OR_END) - { - cs->bCondResult = savedCondResult || condResult; - cs->LogicalOp = --savedLogicalOp; - } - else // eLogicalOperation::NONE - { - cs->bCondResult = condResult; - cs->LogicalOp = savedLogicalOp; - } - - cs->SetScriptFileDir(savedScriptFileDir.c_str()); - cs->SetScriptFileName(savedScriptFileName.c_str()); - - cs->SetIp(retnAddress); - cs->SetScmFunction(prevScmFunctionId); - } - }; - - ScmFunction *ScmFunction::Store[store_size] = { /* default initializer - nullptr */ }; - size_t ScmFunction::allocationPlace = 0; - - void ResetScmFunctionStore() - { - for each(ScmFunction *scmFunc in ScmFunction::Store) - { - if (scmFunc) delete scmFunc; - } - ScmFunction::allocationPlace = 0; - } - /************************************************************************/ /* Opcode definitions */ /************************************************************************/ @@ -2174,10 +2089,17 @@ namespace CLEO return OR_CONTINUE; } - //0AB2=-1,ret + //0AB2=-1,cleo_return OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread) { - ScmFunction *scmFunc = ScmFunction::Store[reinterpret_cast(thread)->GetScmFunction()]; + auto cs = reinterpret_cast(thread); + + ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); + if (scmFunc == nullptr) + { + SHOW_ERROR("Invalid Cleo Call reference. [0AB2] possibly used without preceding [0AB1] in script %s\nScript suspended.", cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } DWORD returnParamCount = GetVarArgCount(thread); if (returnParamCount) @@ -2185,19 +2107,19 @@ namespace CLEO auto paramType = (eDataType)*thread->GetBytePointer(); if (!IsImmInteger(paramType)) { - SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", cs->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } DWORD declaredParamCount; *thread >> declaredParamCount; if(returnParamCount - 1 < declaredParamCount) // minus 'num args' itself { - SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, cs->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } else if (returnParamCount - 1 > declaredParamCount) // more args than needed, not critical { - LOG_WARNING(thread, "Opcode [0AB2] declared %d return args, but provided %d in script %s", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "Opcode [0AB2] declared %d return args, but provided %d in script %s", declaredParamCount, returnParamCount - 1, cs->GetInfoStr().c_str()); } } if (returnParamCount) GetScriptParams(thread, returnParamCount); @@ -2210,12 +2132,12 @@ namespace CLEO if(returnParamCount) returnParamCount--; // do not count the 'num args' argument itself if (returnSlotCount > returnParamCount) { - SHOW_ERROR("Opcode [0AB2] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Opcode [0AB2] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } else if (returnSlotCount < returnParamCount) // more args than needed, not critical { - LOG_WARNING(thread, "Opcode [0AB2] returned %d params, while function caller expected %d in script %s", returnParamCount, returnSlotCount, ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "Opcode [0AB2] returned %d params, while function caller expected %d in script %s", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); } if (returnSlotCount) SetScriptParams(thread, returnSlotCount); @@ -3084,44 +3006,31 @@ namespace CLEO //2002=-1, cleo_return_with ... OpcodeResult __stdcall opcode_2002(CRunningScript* thread) { - auto cs = reinterpret_cast(thread); - DWORD returnParamCount = GetVarArgCount(cs); - - if (returnParamCount) GetScriptParams(cs, returnParamCount); - - ScmFunction* scmFunc = ScmFunction::Store[cs->GetScmFunction()]; - scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here - if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module - delete scmFunc; - - DWORD returnSlotCount = GetVarArgCount(cs); - if(returnParamCount != returnSlotCount) // new CLEO5 opcode, strict error checks + DWORD argCount = GetVarArgCount(thread); + if (argCount < 1) { - SHOW_ERROR("Opcode [2002] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(cs); + SHOW_ERROR("Opcode [2002] missing condition result argument in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); } - if (returnSlotCount) SetScriptParams(cs, returnSlotCount); - cs->IncPtr(); // skip var args + DWORD result; *thread >> result; + SetScriptCondResult(thread, result != 0); - SetScriptCondResult(cs, true); - return OR_CONTINUE; + return CCustomOpcodeSystem::CleoReturnGeneric(0x2002, thread, true); } - //2003=0, cleo_return_false + //2003=-1, cleo_return_fail OpcodeResult __stdcall opcode_2003(CRunningScript* thread) { - auto cs = reinterpret_cast(thread); - - ScmFunction* scmFunc = ScmFunction::Store[cs->GetScmFunction()]; - scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here - if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module - delete scmFunc; - - SkipUnusedVarArgs(thread); // just exit without change of return params + DWORD argCount = GetVarArgCount(thread); + if (argCount != 0) // argument(s) not supported yet + { + SHOW_ERROR("Too many arguments of opcode [2003] in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } - SetScriptCondResult(cs, false); - return OR_CONTINUE; + SetScriptCondResult(thread, false); + return CCustomOpcodeSystem::CleoReturnGeneric(0x2003, thread, false); } } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 4835b993..96913524 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -8,7 +8,6 @@ namespace CLEO { typedef OpcodeResult(__stdcall * CustomOpcodeHandler)(CRunningScript*); - void ResetScmFunctionStore(); bool is_legacy_handle(DWORD dwHandle); FILE * convert_handle_to_file(DWORD dwHandle); @@ -44,6 +43,7 @@ namespace CLEO static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); + static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs); static OpcodeResult ErrorSuspendScript(CRunningScript* thread); // suspend script execution forever private: diff --git a/source/ScmFunction.cpp b/source/ScmFunction.cpp new file mode 100644 index 00000000..0540d010 --- /dev/null +++ b/source/ScmFunction.cpp @@ -0,0 +1,119 @@ +#include "stdafx.h" +#include "ScmFunction.h" +#include "CCustomOpcodeSystem.h" +#include "CScriptEngine.h" + +namespace CLEO +{ + ScmFunction* ScmFunction::store[Store_Size] = { 0 }; + size_t ScmFunction::allocationPlace = 0; + + ScmFunction* ScmFunction::Get(unsigned short idx) + { + if (idx >= Store_Size) + return nullptr; + + return store[idx]; + } + + void ScmFunction::Clear() + { + for each (ScmFunction* scmFunc in store) + { + if (scmFunc != nullptr) delete scmFunc; + } + ScmFunction::allocationPlace = 0; + } + + void* ScmFunction::operator new(size_t size) + { + size_t start_search = allocationPlace; + while (store[allocationPlace] != nullptr) // find first unused position in store + { + if (++allocationPlace >= Store_Size) allocationPlace = 0; // end of store reached + if (allocationPlace == start_search) + { + SHOW_ERROR("CLEO function storage stack overfllow!"); + throw std::bad_alloc(); // the store is filled up + } + } + ScmFunction* obj = reinterpret_cast(::operator new(size)); + store[allocationPlace] = obj; + return obj; + } + + void ScmFunction::operator delete(void* mem) + { + store[reinterpret_cast(mem)->thisScmFunctionId] = nullptr; + ::operator delete(mem); + } + + ScmFunction::ScmFunction(CLEO::CRunningScript* thread) : + prevScmFunctionId(reinterpret_cast(thread)->GetScmFunction()) + { + auto cs = reinterpret_cast(thread); + + // create snapshot of current scope + savedBaseIP = cs->BaseIP; + std::copy(std::begin(cs->Stack), std::end(cs->Stack), std::begin(savedStack)); + savedSP = cs->SP; + + auto scope = cs->IsMission() ? missionLocals : cs->LocalVar; + std::copy(scope, scope + 32, savedTls); + + savedCondResult = cs->bCondResult; + savedLogicalOp = cs->LogicalOp; + savedNotFlag = cs->NotFlag; + + savedScriptFileDir = cs->GetScriptFileDir(); + savedScriptFileName = cs->GetScriptFileName(); + + // init new scope + std::fill(std::begin(cs->Stack), std::end(cs->Stack), nullptr); + cs->SP = 0; + cs->bCondResult = false; + cs->LogicalOp = eLogicalOperation::NONE; + cs->NotFlag = false; + + cs->SetScmFunction(thisScmFunctionId = (unsigned short)allocationPlace); + } + + void ScmFunction::Return(CRunningScript* thread) + { + auto cs = reinterpret_cast(thread); + + // restore parent scope's gosub call stack + cs->BaseIP = savedBaseIP; + std::copy(std::begin(savedStack), std::end(savedStack), std::begin(cs->Stack)); + cs->SP = savedSP; + + // restore parent scope's local variables + std::copy(savedTls, savedTls + 32, cs->IsMission() ? missionLocals : cs->LocalVar); + + // process conditional result of just ended function in parent scope + bool condResult = cs->bCondResult; + if (savedNotFlag) condResult = !condResult; + + if (savedLogicalOp >= eLogicalOperation::AND_2 && savedLogicalOp < eLogicalOperation::AND_END) + { + cs->bCondResult = savedCondResult && condResult; + cs->LogicalOp = --savedLogicalOp; + } + else if (savedLogicalOp >= eLogicalOperation::OR_2 && savedLogicalOp < eLogicalOperation::OR_END) + { + cs->bCondResult = savedCondResult || condResult; + cs->LogicalOp = --savedLogicalOp; + } + else // eLogicalOperation::NONE + { + cs->bCondResult = condResult; + cs->LogicalOp = savedLogicalOp; + } + + cs->SetScriptFileDir(savedScriptFileDir.c_str()); + cs->SetScriptFileName(savedScriptFileName.c_str()); + + cs->SetIp(retnAddress); + cs->SetScmFunction(prevScmFunctionId); + } +}; diff --git a/source/ScmFunction.h b/source/ScmFunction.h new file mode 100644 index 00000000..31570552 --- /dev/null +++ b/source/ScmFunction.h @@ -0,0 +1,39 @@ +#pragma once +#include "..\cleo_sdk\CLEO.h" +#include "CDebug.h" + +#include +#include +#include + +namespace CLEO +{ + struct ScmFunction + { + static const size_t Store_Size = 0x400; + static ScmFunction* store[Store_Size]; + static size_t allocationPlace; // contains an index of last allocated object + static ScmFunction* Get(unsigned short idx); + static void Clear(); + + unsigned short prevScmFunctionId, thisScmFunctionId; + void* savedBaseIP; + BYTE* retnAddress; + BYTE* savedStack[8]; // gosub stack + WORD savedSP; + SCRIPT_VAR savedTls[32]; + std::list stringParams; // texts with this scope lifetime + bool savedCondResult; + eLogicalOperation savedLogicalOp; + bool savedNotFlag; + void* moduleExportRef = 0; // modules switching. Points to modules baseIP in case if this is export call + std::string savedScriptFileDir; // modules switching + std::string savedScriptFileName; // modules switching + + void* operator new(size_t size); + void operator delete(void* mem); + ScmFunction(CRunningScript* thread); + + void Return(CRunningScript* thread); + }; +} From 91e78cdfbc45342cad0592e36f305f3a75f64184 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 2 Jan 2024 21:34:17 +0100 Subject: [PATCH 081/216] Return strings support (#51) --- CHANGELOG.md | 1 + cleo_sdk/CLEO.h | 2 +- source/CCustomOpcodeSystem.cpp | 279 +++++++++++++++++++++------------ source/CCustomOpcodeSystem.h | 16 +- 4 files changed, 193 insertions(+), 105 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f489763c..c2669204 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - **cleo_return_\*** opcodes now can pass strings as return arguments - SCM functions **(0AB1)** now keep their own GOSUB's call stack - new opcode **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - changes in file operations diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index b6c37259..2dac718e 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -47,7 +47,7 @@ enum eGameVersion : int }; // operand types -enum eDataType : int +enum eDataType : BYTE { DT_END, // variable args end marker DT_DWORD, // literal int 32 diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 482dba5b..38fc6d94 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -9,10 +9,11 @@ #include #include +#include -#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } namespace CLEO { @@ -264,44 +265,6 @@ namespace CLEO return (callbackResult != OR_NONE) ? callbackResult : result; } - OpcodeResult CCustomOpcodeSystem::CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs) - { - auto cs = reinterpret_cast(thread); - - ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); - if (scmFunc == nullptr) - { - SHOW_ERROR("Invalid Cleo Call reference. [%04X] possibly used without preceding [0AB1] in script %s\nScript suspended.", opcode, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - - DWORD returnParamCount = 0; - if(returnArgs) - { - returnParamCount = GetVarArgCount(cs); - if (returnParamCount) GetScriptParams(cs, returnParamCount); - } - - scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here - if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module - delete scmFunc; - - if (returnArgs) - { - DWORD returnSlotCount = GetVarArgCount(cs); - if (returnParamCount != returnSlotCount) // new CLEO5 opcode, strict error checks - { - SHOW_ERROR("Opcode [%04X] returned %d params, while function caller expected %d in script %s\nScript suspended.", opcode, returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(cs); - } - - if (returnSlotCount) SetScriptParams(cs, returnSlotCount); - cs->IncPtr(); // skip var args - } - - return OR_CONTINUE; - } - OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) { //thread->SetActive(false): // will crash game if no active script left @@ -717,6 +680,10 @@ namespace CLEO } else { + size_t maxSize = 16 + 1; // long string and terminator + maxSize = min(maxSize, bufSize); + ZeroMemory(buf, maxSize); + GetScriptStringParam(thread, buf, (BYTE)min(bufSize, 0xFF)); // standard game's function } @@ -733,23 +700,46 @@ namespace CLEO bool WriteStringParam(CRunningScript* thread, const char* str) { auto target = GetStringParamWriteBuffer(thread); + return WriteStringParam(target, str); + } + + bool WriteStringParam(const StringParamBufferInfo& target, const char* str) + { + lastErrorMsg.clear(); - if(target.first != nullptr && target.second > 0) + if (str != nullptr && (size_t)str <= CCustomOpcodeSystem::MinValidAddress) { - size_t length = str == nullptr ? 0 : strlen(str); - length = min(length, target.second - 1); // and null terminator + lastErrorMsg = stringPrintf("Writing string from invalid '0x%X' pointer", target.data); + return false; + } - if (length > 0) std::memcpy(target.first, str, length); - target.first[length] = '\0'; + if ((size_t)target.data <= CCustomOpcodeSystem::MinValidAddress) + { + lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", target.data); + return false; + } - return true; // ok + if (target.size == 0) + { + return false; } - return false; // failed + bool addTerminator = target.needTerminator; + size_t buffLen = target.size - addTerminator; + size_t length = str == nullptr ? 0 : strlen(str); + + if (buffLen > length) addTerminator = true; // there is space left for terminator + + length = min(length, buffLen); + if (length > 0) std::memcpy(target.data, str, length); + if (addTerminator) target.data[length] = '\0'; + + return true; } - std::pair GetStringParamWriteBuffer(CRunningScript* thread) + StringParamBufferInfo GetStringParamWriteBuffer(CRunningScript* thread) { + StringParamBufferInfo result; lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); @@ -761,9 +751,14 @@ namespace CLEO if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); - return { nullptr, 0 }; // error + return result; // error } - return { opcodeParams[0].pcParam, 0x7FFFFFFF }; // user allocated memory block can be any size + + result.data = opcodeParams[0].pcParam; + result.size = 0x7FFFFFFF; // user allocated memory block can be any size + result.needTerminator = true; + + return result; } else if (IsVarString(paramType)) @@ -775,20 +770,26 @@ namespace CLEO case DT_LVAR_TEXTLABEL: case DT_VAR_TEXTLABEL_ARRAY: case DT_LVAR_TEXTLABEL_ARRAY: - return { (char*)GetScriptParamPointer(thread), 8 }; + result.data = (char*)GetScriptParamPointer(thread); + result.size = 8; + result.needTerminator = false; + return result; // long string variable case DT_VAR_STRING: case DT_LVAR_STRING: case DT_VAR_STRING_ARRAY: case DT_LVAR_STRING_ARRAY: - return { (char*)GetScriptParamPointer(thread), 16 }; + result.data = (char*)GetScriptParamPointer(thread); + result.size = 16; + result.needTerminator = false; + return result; } } lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param - return { nullptr, 0 }; // error + return result; // error } // perform 'sprintf'-operation for parameters, passed through SCM @@ -992,6 +993,116 @@ namespace CLEO return -1; // error } + OpcodeResult CCustomOpcodeSystem::CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs, DWORD returnArgCount, bool strictArgCount) + { + auto cs = reinterpret_cast(thread); + + ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); + if (scmFunc == nullptr) + { + SHOW_ERROR("Invalid Cleo Call reference. [%04X] possibly used without preceding [0AB1] in script %s\nScript suspended.", opcode, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + // store return arguments + static SCRIPT_VAR arguments[32]; + static bool argumentIsStr[32]; + std::forward_list stringParams; // scope guard for strings + if (returnArgs) + { + if (returnArgCount > 32) + { + SHOW_ERROR("Opcode [%04X] has too many (%d) args in script %s\nScript suspended.", opcode, returnArgCount, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + auto nVarArg = GetVarArgCount(thread); + if (returnArgCount > nVarArg) + { + SHOW_ERROR("Opcode [%04X] declared %d args, but %d was provided in script %s\nScript suspended.", opcode, returnArgCount, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + for (DWORD i = 0; i < returnArgCount; i++) + { + SCRIPT_VAR* arg = arguments + i; + argumentIsStr[i] = false; + + auto paramType = (eDataType)*thread->GetBytePointer(); + if (IsImmInteger(paramType) || IsVariable(paramType)) + { + *thread >> arg->dwParam; + } + else if (paramType == DT_FLOAT) + { + *thread >> arg->fParam; + } + else if (IsImmString(paramType) || IsVarString(paramType)) + { + argumentIsStr[i] = true; + + auto str = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(str) + stringParams.emplace_front(str); + arg->pcParam = stringParams.front().data(); + } + else + { + SHOW_ERROR("Invalid argument type '0x%02X' in opcode [%04X] in script %s\nScript suspended.", paramType, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + } + } + + // handle program flow + scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here + if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module + delete scmFunc; + + if (returnArgs) + { + DWORD returnSlotCount = GetVarArgCount(cs); + if (returnSlotCount > returnArgCount || (strictArgCount && returnSlotCount < returnArgCount)) + { + SHOW_ERROR("Opcode [%04X] returned %d params, while function caller expected %d in script %s\nScript suspended.", opcode, returnArgCount, returnSlotCount, cs->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(cs); + } + else if (returnSlotCount < returnArgCount) + { + LOG_WARNING(thread, "Opcode [%04X] returned %d params, while function caller expected %d in script %s", opcode, returnArgCount, returnSlotCount, cs->GetInfoStr().c_str()); + } + + // set return args + for (DWORD i = 0; i < returnArgCount; i++) + { + auto arg = (SCRIPT_VAR*)thread->GetBytePointer(); + + auto paramType = *(eDataType*)arg; + if (IsVarString(paramType)) + { + WriteStringParam(thread, arguments[i].pcParam); + } + else if (IsVariable(paramType)) + { + if (argumentIsStr[i]) // source was string, write it into provided buffer ptr + { + auto ok = WriteStringParam(thread, arguments[i].pcParam); OPCODE_VALIDATE_STR_ARG_WRITE(ok) + } + else + *thread << arguments[i].dwParam; + } + else + { + SHOW_ERROR("Invalid output argument type '0x%02X' in opcode [%04X] in script %s\nScript suspended.", paramType, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + } + } + + SkipUnusedVarArgs(thread); // skip var args terminator too + + return OR_CONTINUE; + } + // Legacy modes for CLEO 3 FILE* legacy_fopen(const char* szPath, const char* szMode) { @@ -2092,58 +2203,31 @@ namespace CLEO //0AB2=-1,cleo_return OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread) { - auto cs = reinterpret_cast(thread); - - ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); - if (scmFunc == nullptr) - { - SHOW_ERROR("Invalid Cleo Call reference. [0AB2] possibly used without preceding [0AB1] in script %s\nScript suspended.", cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - DWORD returnParamCount = GetVarArgCount(thread); if (returnParamCount) { auto paramType = (eDataType)*thread->GetBytePointer(); if (!IsImmInteger(paramType)) { - SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", cs->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } DWORD declaredParamCount; *thread >> declaredParamCount; - if(returnParamCount - 1 < declaredParamCount) // minus 'num args' itself + if (returnParamCount - 1 < declaredParamCount) // minus 'num args' itself { - SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, cs->GetInfoStr().c_str()); + SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } else if (returnParamCount - 1 > declaredParamCount) // more args than needed, not critical { - LOG_WARNING(thread, "Opcode [0AB2] declared %d return args, but provided %d in script %s", declaredParamCount, returnParamCount - 1, cs->GetInfoStr().c_str()); + LOG_WARNING(thread, "Opcode [0AB2] declared %d return args, but provided %d in script %s", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); } - } - if (returnParamCount) GetScriptParams(thread, returnParamCount); - scmFunc->Return(thread); // jump back to cleo_call, right after last input param. Return slot var args starts here - if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // export - release module - delete scmFunc; - - DWORD returnSlotCount = GetVarArgCount(thread); - if(returnParamCount) returnParamCount--; // do not count the 'num args' argument itself - if (returnSlotCount > returnParamCount) - { - SHOW_ERROR("Opcode [0AB2] returned %d params, while function caller expected %d in script %s\nScript suspended.", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - else if (returnSlotCount < returnParamCount) // more args than needed, not critical - { - LOG_WARNING(thread, "Opcode [0AB2] returned %d params, while function caller expected %d in script %s", returnParamCount, returnSlotCount, cs->GetInfoStr().c_str()); + returnParamCount = declaredParamCount; } - if (returnSlotCount) SetScriptParams(thread, returnSlotCount); - thread->IncPtr(); // skip var args terminator - - return OR_CONTINUE; + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, true, returnParamCount); } //0AB3=2,var %1d% = %2d% @@ -2518,19 +2602,11 @@ namespace CLEO //0AD3=-1,string %1d% format %2d% ... OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) { - auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.first) + auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - if (resultArg.first != nullptr && resultArg.second > 0) - { - size_t length = text == nullptr ? 0 : strlen(text); - length = min(length, resultArg.second - 1); // and null terminator - - if (length > 0) std::memcpy(resultArg.first, text, length); - resultArg.first[length] = '\0'; - } - + WriteStringParam(resultArg, text); return OR_CONTINUE; } @@ -2916,9 +2992,9 @@ namespace CLEO // this opcode is useless now float val; *thread >> val; auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.first) + auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) - sprintf(resultArg.first, format, val); + sprintf_s(resultArg.data, resultArg.size, format, val); return OR_CONTINUE; } @@ -3014,9 +3090,10 @@ namespace CLEO } DWORD result; *thread >> result; + argCount--; SetScriptCondResult(thread, result != 0); - return CCustomOpcodeSystem::CleoReturnGeneric(0x2002, thread, true); + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x2002, thread, true, argCount); } //2003=-1, cleo_return_fail @@ -3030,7 +3107,7 @@ namespace CLEO } SetScriptCondResult(thread, false); - return CCustomOpcodeSystem::CleoReturnGeneric(0x2003, thread, false); + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x2003, thread); } } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 96913524..11eb5b7b 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -43,7 +43,7 @@ namespace CLEO static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); - static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs); + OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); static OpcodeResult ErrorSuspendScript(CRunningScript* thread); // suspend script execution forever private: @@ -73,10 +73,20 @@ namespace CLEO extern void(__thiscall * ProcessScript)(CRunningScript*); + struct StringParamBufferInfo + { + char* data = nullptr; + DWORD size = 0; + bool needTerminator = false; + }; + char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); - bool WriteStringParam(CRunningScript* thread, const char* str); - std::pair GetStringParamWriteBuffer(CRunningScript* thread); // consumes the param + StringParamBufferInfo GetStringParamWriteBuffer(CRunningScript* thread); // consumes the param int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); + + bool WriteStringParam(CRunningScript* thread, const char* str); + bool WriteStringParam(const StringParamBufferInfo& target, const char* str); + void SkipUnusedVarArgs(CRunningScript* thread); // for var-args opcodes DWORD GetVarArgCount(CRunningScript* thread); // for var-args opcodes } From 2606b2e5287faf75f949904a024edd3c32764719 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 6 Jan 2024 17:11:14 +0100 Subject: [PATCH 082/216] forget_memory opcode (#53) forget_memory opcode added. 0AC8 returns zero filled blocks --- CHANGELOG.md | 1 + source/CCustomOpcodeSystem.cpp | 56 ++++++++++++++++++++++++++++++---- source/CCustomOpcodeSystem.h | 1 + 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2669204..fbb08539 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** + - **2004 ([forget_memory](https://library.sannybuilder.com/#/sa/CLEO/2004))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 38fc6d94..c012a2d4 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -130,6 +130,7 @@ namespace CLEO OpcodeResult __stdcall opcode_2001(CRunningScript* thread); // get_script_filename OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_fail + OpcodeResult __stdcall opcode_2004(CRunningScript* thread); // forget_memory typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { @@ -406,6 +407,7 @@ namespace CLEO CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail + CLEO_RegisterOpcode(0x2004, opcode_2004); // forget_memory } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -2472,10 +2474,19 @@ namespace CLEO //0AC8=2,%2d% = allocate_memory_size %1d% OpcodeResult __stdcall opcode_0AC8(CRunningScript *thread) { - DWORD size; - *thread >> size; - void *mem = malloc(size); - if (mem) GetInstance().OpcodeSystem.m_pAllocations.insert(mem); + DWORD size; *thread >> size; + + void* mem = calloc(size, 1); + if (mem) + { + DWORD oldProtect; + VirtualProtect(mem, size, PAGE_EXECUTE_READWRITE, &oldProtect); + + GetInstance().OpcodeSystem.m_pAllocations.insert(mem); + } + else + LOG_WARNING(thread, "[0AC8] failed to allocate %d bytes of memory in script %s", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); + *thread << mem; SetScriptCondResult(thread, mem != nullptr); return OR_CONTINUE; @@ -2484,14 +2495,24 @@ namespace CLEO //0AC9=1,free_allocated_memory %1d% OpcodeResult __stdcall opcode_0AC9(CRunningScript *thread) { - void *mem; - *thread >> mem; + void *mem; *thread >> mem; + + if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("[0AC9] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + // allocated with 0AC8 auto & allocs = GetInstance().OpcodeSystem.m_pAllocations; if (allocs.find(mem) != allocs.end()) { free(mem); allocs.erase(mem); + return OR_CONTINUE; // done } + + LOG_WARNING(thread, "[0AC9] used with pointer to unknown or already freed memory in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return OR_CONTINUE; } @@ -3109,6 +3130,29 @@ namespace CLEO SetScriptCondResult(thread, false); return GetInstance().OpcodeSystem.CleoReturnGeneric(0x2003, thread); } + + //2004=1,forget_memory %1d% + OpcodeResult __stdcall opcode_2004(CRunningScript* thread) + { + void* mem; *thread >> mem; + + if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("[2004] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + // allocated with 0AC8 + auto& allocs = GetInstance().OpcodeSystem.m_pAllocations; + if (allocs.find(mem) != allocs.end()) + { + allocs.erase(mem); + return OR_CONTINUE; // done + } + + LOG_WARNING(thread, "[2004] used with pointer to unknown or already freed memory in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return OR_CONTINUE; + } } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 11eb5b7b..8168792f 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -53,6 +53,7 @@ namespace CLEO friend OpcodeResult __stdcall opcode_0AA3(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC8(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC9(CRunningScript *pScript); + friend OpcodeResult __stdcall opcode_2004(CRunningScript* pScript); std::set m_hFiles; std::set m_hNativeLibs; From d560e8316db11adeeb94710f5e9639795986f752 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 6 Jan 2024 22:41:41 +0100 Subject: [PATCH 083/216] Parameters validation in call_function opcodes. (#56) Opcodes 0AA5-0AA7 base on single generic function. Error checks in call_function opcodes. --- cleo_sdk/CLEO.h | 3 +- source/CCustomOpcodeSystem.cpp | 410 ++++++++++++--------------------- source/CCustomOpcodeSystem.h | 12 +- 3 files changed, 159 insertions(+), 266 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 2dac718e..653c2823 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -367,7 +367,8 @@ struct CRunningScript bool GetNotFlag() const { return NotFlag; } void SetNotFlag(bool state) { NotFlag = state; } - char ReadDataType() { return ReadDataByte(); } + eDataType PeekDataType() const { return *(eDataType*)CurrentIP; } + eDataType ReadDataType() { return (eDataType)ReadDataByte(); } short ReadDataVarIndex() { return ReadDataWord(); } short ReadDataArrayOffset() { return ReadDataWord(); } short ReadDataArrayIndex() { return ReadDataWord(); } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c012a2d4..33c22058 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -11,12 +11,17 @@ #include #include -#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CLEO::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } namespace CLEO { + template inline CRunningScript& operator>>(CRunningScript& thread, T*& pval); + template inline CRunningScript& operator<<(CRunningScript& thread, T* pval); + template inline CRunningScript& operator<<(CRunningScript& thread, memory_pointer pval); + template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); + DWORD FUNC_fopen; DWORD FUNC_fclose; DWORD FUNC_fwrite; @@ -196,10 +201,9 @@ namespace CLEO WORD CCustomOpcodeSystem::lastOpcode = 0; WORD* CCustomOpcodeSystem::lastOpcodePtr = nullptr; WORD CCustomOpcodeSystem::lastCustomOpcode = 0; - std::string lastErrorMsg = {}; + std::string CCustomOpcodeSystem::lastErrorMsg = {}; WORD CCustomOpcodeSystem::prevOpcode = 0; - // opcode handler for custom opcodes OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) { @@ -239,7 +243,7 @@ namespace CLEO if (opcode > LastOriginalOpcode) { - SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nPreviously called opcode: [%04X]\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str(), prevOpcode); return ErrorSuspendScript(thread); } @@ -266,6 +270,117 @@ namespace CLEO return (callbackResult != OR_NONE) ? callbackResult : result; } + OpcodeResult CCustomOpcodeSystem::CallFunctionGeneric(WORD opcode, CRunningScript* thread, bool thisCall, bool returnArg) + { + void* func; *thread >> func; + void* struc = nullptr; if(thisCall) *thread >> struc; + DWORD numParams; *thread >> numParams; + DWORD stackAlign; *thread >> stackAlign; // pop + + if ((size_t)func <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' function pointer param of opcode [%04X] in script %s\nScript suspended.", func, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + if (thisCall && (size_t)struc <= CCustomOpcodeSystem::MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' struct pointer param of opcode [%04X] in script %s\nScript suspended.", struc, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + int nVarArg = GetVarArgCount(thread); + if (numParams + returnArg != nVarArg) // and return argument + { + SHOW_ERROR("Opcode [%04X] declared %d input args, but provided %d in script %s\nScript suspended.", opcode, numParams, (int)nVarArg - returnArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + constexpr size_t Max_Args = 32; + if (numParams > Max_Args) + { + SHOW_ERROR("Opcode [%04X] used with more than supported arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + static SCRIPT_VAR arguments[Max_Args] = { 0 }; + SCRIPT_VAR* arguments_end = arguments + numParams; + + constexpr size_t Max_Text_Params = 5; + static char textParams[Max_Text_Params][MAX_STR_LEN]; + size_t currTextParam = 0; + + stackAlign *= 4; // bytes peer argument + + // retrieve parameters + for (size_t i = 0; i < numParams; i++) + { + auto& param = arguments[i]; + auto paramType = thread->PeekDataType(); + + if (IsImmInteger(paramType) || IsVariable(paramType)) + *thread >> param.dwParam; + else + if (IsImmFloat(paramType)) + *thread >> param.fParam; + else + if (IsImmString(paramType) || IsVarString(paramType)) + { + if (currTextParam >= Max_Text_Params) + { + SHOW_ERROR("Opcode [%04X] used with more than supported string arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + + param.pcParam = ReadStringParam(thread, textParams[currTextParam], MAX_STR_LEN); OPCODE_VALIDATE_STR_ARG_READ(param.pcParam) + currTextParam++; + } + else + { + SHOW_ERROR("Invalid param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + } + + // validate return target variable + if (returnArg) + { + auto paramType = thread->PeekDataType(); + + if (!IsVariable(paramType) && !IsVarString(paramType)) + { + SHOW_ERROR("Invalid return param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return CCustomOpcodeSystem::ErrorSuspendScript(thread); + } + } + + DWORD result; + _asm + { + // transfer args to stack + lea ecx, arguments + call_func_loop: + cmp ecx, arguments_end + jae call_func_loop_end + push[ecx] + add ecx, 0x4 + jmp call_func_loop + call_func_loop_end: + + // call function + mov ecx, struc + xor eax, eax + call func + mov result, eax // get result + add esp, stackAlign // cleanup stack + } + + if (returnArg) *thread << result; + + SkipUnusedVarArgs(thread); + return OR_CONTINUE; + } + OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) { //thread->SetActive(false): // will crash game if no active script left @@ -627,7 +742,7 @@ namespace CLEO if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } const auto bufLength = bufSize ? bufSize - 1 : 0; // max text length (minus terminator char) - lastErrorMsg.clear(); + CCustomOpcodeSystem::lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); if (IsImmInteger(paramType) || IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays @@ -636,7 +751,7 @@ namespace CLEO if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { - lastErrorMsg = (opcodeParams[0].dwParam == 0) ? + CCustomOpcodeSystem::lastErrorMsg = (opcodeParams[0].dwParam == 0) ? "Reading string from 'null' pointer argument" : stringPrintf("Reading string from invalid '0x%X' pointer argument", opcodeParams[0].dwParam); @@ -648,7 +763,7 @@ namespace CLEO if (length > bufLength) { - lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); length = bufLength; // clamp to target buffer size } @@ -673,7 +788,7 @@ namespace CLEO if (length > bufLength) { - lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); length = bufLength; // clamp to target buffer size } @@ -694,7 +809,7 @@ namespace CLEO // unsupported param type GetScriptParams(thread, 1); // skip unhandled param - lastErrorMsg = stringPrintf("Reading string argument, got %s", ToKindStr(paramType)); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Reading string argument, got %s", ToKindStr(paramType)); return nullptr; // error, target buffer untouched } @@ -707,17 +822,17 @@ namespace CLEO bool WriteStringParam(const StringParamBufferInfo& target, const char* str) { - lastErrorMsg.clear(); + CCustomOpcodeSystem::lastErrorMsg.clear(); if (str != nullptr && (size_t)str <= CCustomOpcodeSystem::MinValidAddress) { - lastErrorMsg = stringPrintf("Writing string from invalid '0x%X' pointer", target.data); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string from invalid '0x%X' pointer", target.data); return false; } if ((size_t)target.data <= CCustomOpcodeSystem::MinValidAddress) { - lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", target.data); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", target.data); return false; } @@ -742,7 +857,7 @@ namespace CLEO StringParamBufferInfo GetStringParamWriteBuffer(CRunningScript* thread) { StringParamBufferInfo result; - lastErrorMsg.clear(); + CCustomOpcodeSystem::lastErrorMsg.clear(); auto paramType = CLEO_GetOperandType(thread); if (IsImmInteger(paramType) || IsVariable(paramType)) @@ -752,7 +867,7 @@ namespace CLEO if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { - lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); return result; // error } @@ -789,7 +904,7 @@ namespace CLEO } } - lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param return result; // error } @@ -802,12 +917,12 @@ namespace CLEO char* outIter = outputStr; char bufa[256], fmtbufa[64], *fmta; - lastErrorMsg.clear(); + CCustomOpcodeSystem::lastErrorMsg.clear(); // invalid input arguments if(outputStr == nullptr || len == 0) { - lastErrorMsg = "Need target buffer to read formatted string"; + CCustomOpcodeSystem::lastErrorMsg = "Need target buffer to read formatted string"; SkipUnusedVarArgs(thread); return -1; // error } @@ -892,7 +1007,7 @@ namespace CLEO const char* str = ReadStringParam(thread, bufa, sizeof(bufa)); if(str == nullptr) // read error { - if(lastErrorMsg.find("'null' pointer") != std::string::npos) + if(CCustomOpcodeSystem::lastErrorMsg.find("'null' pointer") != std::string::npos) { static const char none[] = "(null)"; str = none; @@ -971,7 +1086,7 @@ namespace CLEO { _ReadFormattedString_OutOfMemory: // jump here on error - lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole formatted string", len); + CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole formatted string", len); SkipUnusedVarArgs(thread); outputStr[len - 1] = '\0'; return -1; // error @@ -980,8 +1095,8 @@ namespace CLEO // still more var-args available if (CLEO_GetOperandType(thread) != DT_END) { - lastErrorMsg = "More params than slots in formatted string"; - LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + CCustomOpcodeSystem::lastErrorMsg = "More params than slots in formatted string"; + LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } SkipUnusedVarArgs(thread); // skip terminator too @@ -989,7 +1104,7 @@ namespace CLEO return (int)written; _ReadFormattedString_ArgMissing: // jump here on error - lastErrorMsg = "Less params than slots in formatted string"; + CCustomOpcodeSystem::lastErrorMsg = "Less params than slots in formatted string"; thread->IncPtr(); // skip vararg terminator outputStr[written] = '\0'; return -1; // error @@ -1715,258 +1830,25 @@ namespace CLEO //0AA5=-1,call %1d% num_params %2h% pop %3h% OpcodeResult __stdcall opcode_0AA5(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; unsigned currTextParam = 0; - static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); *thread >> func; - DWORD numParams; *thread >> numParams; - DWORD stackAlign; *thread >> stackAlign; // pop - - auto nVarArg = GetVarArgCount(thread); - if (numParams != nVarArg) - { - SHOW_ERROR("Opcode [0AA5] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - - if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); - stackAlign *= 4; - SCRIPT_VAR *arguments_end = arguments + numParams; - - // retrieve parameters - for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) - { - auto paramType = (eDataType)*thread->GetBytePointer(); - if (IsImmInteger(paramType) || IsVariable(paramType)) - *thread >> arg->dwParam; - else - if (IsImmFloat(paramType)) - *thread >> arg->fParam; - else - if (IsImmString(paramType)) - (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); - else - if (IsVarString(paramType)) - arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? - else - { - SHOW_ERROR("Invalid param type (%s) in opcode [0AA5] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - } - - // call function - _asm - { - lea ecx, arguments - loop_0AA5 : - cmp ecx, arguments_end - jae loop_end_0AA5 - push[ecx] - add ecx, 0x4 - jmp loop_0AA5 - loop_end_0AA5 : - xor eax, eax - call func - add esp, stackAlign - } - - SkipUnusedVarArgs(thread); - return OR_CONTINUE; + return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA5, thread, false, false); } //0AA6=-1,call_method %1d% struct %2d% num_params %3h% pop %4h% OpcodeResult __stdcall opcode_0AA6(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; unsigned currTextParam = 0; - static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); *thread >> func; - void* struc; *thread >> struc; - DWORD numParams; *thread >> numParams; - DWORD stackAlign; *thread >> stackAlign; // pop - - auto nVarArg = GetVarArgCount(thread); - if (numParams != nVarArg) - { - SHOW_ERROR("Opcode [0AA6] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - - if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); - stackAlign *= 4; - SCRIPT_VAR *arguments_end = arguments + numParams; - - // retrieve parameters - for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) - { - auto paramType = (eDataType)*thread->GetBytePointer(); - if (IsImmInteger(paramType) || IsVariable(paramType)) - *thread >> arg->dwParam; - else - if (IsImmFloat(paramType)) - *thread >> arg->fParam; - else - if (IsImmString(paramType)) - (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); - else - if (IsVarString(paramType)) - arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? - else - { - SHOW_ERROR("Invalid param type (%s) in opcode [0AA6] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - } - - _asm - { - lea ecx, arguments - loop_0AA6 : - cmp ecx, arguments_end - jae loop_end_0AA6 - push[ecx] - add ecx, 0x4 - jmp loop_0AA6 - loop_end_0AA6 : - mov ecx, struc - xor eax, eax - call func - add esp, stackAlign - } - - SkipUnusedVarArgs(thread); - return OR_CONTINUE; + return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA6, thread, true, false); } //0AA7=-1,call_function_return %1d% num_params %2h% pop %3h% OpcodeResult __stdcall opcode_0AA7(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; DWORD currTextParam = 0; - static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); *thread >> func; - DWORD numParams; *thread >> numParams; - DWORD stackAlign; *thread >> stackAlign; // pop - - int nVarArg = GetVarArgCount(thread); - if (numParams + 1 != nVarArg) // and return argument - { - SHOW_ERROR("Opcode [0AA7] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, (int)nVarArg - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - - if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); - stackAlign *= 4; - SCRIPT_VAR * arguments_end = arguments + numParams; - - // retrieve parameters - for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) - { - auto paramType = (eDataType)*thread->GetBytePointer(); - if (IsImmInteger(paramType) || IsVariable(paramType)) - *thread >> arg->dwParam; - else - if (IsImmFloat(paramType)) - *thread >> arg->fParam; - else - if (IsImmString(paramType)) - (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); - else - if (IsVarString(paramType)) - arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? - else - { - SHOW_ERROR("Invalid param type (%s) in opcode [0AA7] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - } - - DWORD result; - - _asm - { - lea ecx, arguments - loop_0AA7 : - cmp ecx, arguments_end - jae loop_end_0AA7 - push[ecx] - add ecx, 0x4 - jmp loop_0AA7 - loop_end_0AA7 : - xor eax, eax - call func - mov result, eax - add esp, stackAlign - } - - *thread << result; - SkipUnusedVarArgs(thread); - return OR_CONTINUE; + return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA7, thread, false, true); } //0AA8=-1,call_method_return %1d% struct %2d% num_params %3h% pop %4h% OpcodeResult __stdcall opcode_0AA8(CRunningScript *thread) { - static char textParams[5][MAX_STR_LEN]; DWORD currTextParam = 0; - static SCRIPT_VAR arguments[50] = { 0 }; - void(*func)(); *thread >> func; - void* struc; *thread >> struc; - DWORD numParams; *thread >> numParams; - DWORD stackAlign; *thread >> stackAlign; // pop - - int nVarArg = GetVarArgCount(thread); - if (numParams + 1 != nVarArg) // and return argument - { - SHOW_ERROR("Opcode [0AA8] declared %d input args, but provided %d in script %s\nScript suspended.", numParams, (int)nVarArg - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - - if (numParams > (sizeof(arguments) / sizeof(SCRIPT_VAR))) numParams = sizeof(arguments) / sizeof(SCRIPT_VAR); - stackAlign *= 4; - SCRIPT_VAR *arguments_end = arguments + numParams; - - // retrieve parameters - for (SCRIPT_VAR* arg = arguments; arg != arguments_end; ++arg) - { - auto paramType = (eDataType)*thread->GetBytePointer(); - if (IsImmInteger(paramType) || IsVariable(paramType)) - *thread >> arg->dwParam; - else - if (IsImmFloat(paramType)) - *thread >> arg->fParam; - else - if (IsImmString(paramType)) - (*arg).pcParam = ReadStringParam(thread, textParams[currTextParam++], MAX_STR_LEN); - else - if (IsVarString(paramType)) - arg->pParam = GetScriptParamPointer(thread); // TODO: should use ReadStringParam too to ensure it is null terminated? - else - { - SHOW_ERROR("Invalid param type (%s) in opcode [0AA8] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); - } - } - - DWORD result; - - _asm - { - lea ecx, arguments - loop_0AA8 : - cmp ecx, arguments_end - jae loop_end_0AA8 - push[ecx] - add ecx, 0x4 - jmp loop_0AA8 - loop_end_0AA8 : - mov ecx, struc - xor eax, eax - call func - mov result, eax - add esp, stackAlign - } - - *thread << result; - SkipUnusedVarArgs(thread); - return OR_CONTINUE; + return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA8, thread, true, true); } //0AA9=0, is_game_version_original @@ -3211,7 +3093,7 @@ extern "C" auto result = ReadStringParam(thread, buf, size); if (result == nullptr) - LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return result; } @@ -3219,7 +3101,7 @@ extern "C" void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { if(!WriteStringParam(thread, str)) - LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); } char* WINAPI CLEO_ReadParamsFormatted(CLEO::CRunningScript* thread, const char* format, char* buf, int bufSize) @@ -3230,7 +3112,7 @@ extern "C" if(ReadFormattedString(thread, buf, bufSize, format) == -1) // error? { - LOG_WARNING(thread, "%s in script %s", lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return nullptr; // error } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 8168792f..f8e940c2 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -43,7 +43,8 @@ namespace CLEO static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); - OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); + static OpcodeResult CallFunctionGeneric(WORD opcode, CRunningScript* thread, bool thisCall, bool returnArg); + static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); static OpcodeResult ErrorSuspendScript(CRunningScript* thread); // suspend script execution forever private: @@ -90,4 +91,13 @@ namespace CLEO void SkipUnusedVarArgs(CRunningScript* thread); // for var-args opcodes DWORD GetVarArgCount(CRunningScript* thread); // for var-args opcodes + + inline CRunningScript& operator>>(CRunningScript& thread, DWORD& uval); + inline CRunningScript& operator<<(CRunningScript& thread, DWORD uval); + inline CRunningScript& operator>>(CRunningScript& thread, int& nval); + inline CRunningScript& operator<<(CRunningScript& thread, int nval); + inline CRunningScript& operator>>(CRunningScript& thread, float& fval); + inline CRunningScript& operator<<(CRunningScript& thread, float fval); + inline CRunningScript& operator>>(CRunningScript& thread, CVector& vec); + inline CRunningScript& operator<<(CRunningScript& thread, const CVector& vec); } From 5251bbbc25368a73257f424f2070a371646e6787 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 30 Jan 2024 07:14:33 +0100 Subject: [PATCH 084/216] File related opcodes updates (#57) * File read/write related opcodes moved to FileSystemOperations. New export CLEO_ReadStringParamWriteBuffer * Added Suspend method to CRunningScript * New opcode read_block_from_file * Validation of received file handle in opcodes. Fixed crash in 0AD7 as in was never supported in legacy mode. * Removed no longer used addresses. * Fixed function addresses. * Implemented proper support of read_string_from_file in legacy mode. * Rewritten all FileUtils as class Implemented legacy version for all opcodes. Added test script. * Size param validation for 0AD7 * fixup! Size param validation for 0AD7 * fixup! Size param validation for 0AD7 --- CHANGELOG.md | 10 +- .../FileSystemOperations.cpp | 317 ++++++++++- .../FileSystemOperations.vcxproj | 5 + .../FileSystemOperations.vcxproj.filters | 5 + .../FileSystemOperations/FileUtils.cpp | 425 ++++++++++++++ cleo_plugins/FileSystemOperations/FileUtils.h | 49 ++ cleo_plugins/FileSystemOperations/Utils.h | 90 +++ cleo_sdk/CLEO.h | 18 +- source/CCustomOpcodeSystem.cpp | 524 ++---------------- source/CCustomOpcodeSystem.h | 6 - source/CGameVersionManager.cpp | 15 - source/CGameVersionManager.h | 15 - source/cleo.def | 1 + tests/test_file_read_write.txt | 205 +++++++ 14 files changed, 1165 insertions(+), 520 deletions(-) create mode 100644 cleo_plugins/FileSystemOperations/FileUtils.cpp create mode 100644 cleo_plugins/FileSystemOperations/FileUtils.h create mode 100644 cleo_plugins/FileSystemOperations/Utils.h create mode 100644 tests/test_file_read_write.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index fbb08539..282a641e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,18 +9,23 @@ - new opcode **2102 ([log_to_file](https://library.sannybuilder.com/#/sa/debug/2102))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) - new and updated opcodes + - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - **2004 ([forget_memory](https://library.sannybuilder.com/#/sa/CLEO/2004))** + - **2300 ([get_file_position](https://library.sannybuilder.com/#/sa/file/2300))** + - **2301 ([read_block_from_file](https://library.sannybuilder.com/#/sa/file/2301))** + - opcodes **0A9A**, **0A9B**, **0A9C**, **0A9D**, **0A9E**, **0AAB**, **0AD5**, **0AD6**, **0AD7**, **0AD8**, **0AD9**, **0ADA**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - fixed bug preventing file stream opcodes from working correctly for read-write modes + - fixed buffer overflows in file stream read opcodes + - added/fixed support of all file stream opcodes in legacy mode (Cleo3) - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - - opcodes **0AAB**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin - **cleo_return_\*** opcodes now can pass strings as return arguments - SCM functions **(0AB1)** now keep their own GOSUB's call stack - - new opcode **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `root:\` for _game root_ directory @@ -49,6 +54,7 @@ - new SDK method: CLEO_GetVarArgCount - new SDK method: CLEO_SkipUnusedVarArgs - new SDK method: CLEO_ReadParamsFormatted +- new SDK method: CLEO_ReadStringParamWriteBuffer - new SDK method: CLEO_GetScriptVersion - new SDK method: CLEO_GetScriptInfoStr - new SDK method: CLEO_ResolvePath diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 45f9395b..62c43901 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -1,18 +1,30 @@ #include "plugin.h" #include "CLEO.h" +#include "FileUtils.h" +#include "Utils.h" #include using namespace CLEO; using namespace plugin; +#define READ_HANDLE_PARAM() CLEO_GetIntOpcodeParam(thread); \ + if((size_t)handle <= MinValidAddress) \ + { auto info = scriptInfoStr(thread); SHOW_ERROR("Invalid '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } \ + else if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = scriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } + class FileSystemOperations { public: + static std::set m_hFiles; static std::set m_hFileSearches; static void WINAPI OnFinalizeScriptObjects() { + // clean up opened files + for (auto handle : m_hFiles) File::close(handle); + m_hFiles.clear(); + // clean up file searches for (auto handle : m_hFileSearches) FindClose(handle); m_hFileSearches.clear(); @@ -23,7 +35,23 @@ class FileSystemOperations auto cleoVer = CLEO_GetVersion(); if (cleoVer >= CLEO_VERSION) { + File::initialize(CLEO_GetGameVersion()); // file utils + //register opcodes + CLEO_RegisterOpcode(0x0A9A, opcode_0A9A); + CLEO_RegisterOpcode(0x0A9B, opcode_0A9B); + CLEO_RegisterOpcode(0x0A9C, opcode_0A9C); + CLEO_RegisterOpcode(0x0A9D, opcode_0A9D); + CLEO_RegisterOpcode(0x0A9E, opcode_0A9E); + CLEO_RegisterOpcode(0x0AD5, opcode_0AD5); + CLEO_RegisterOpcode(0x0AD6, opcode_0AD6); + CLEO_RegisterOpcode(0x0AD7, opcode_0AD7); + CLEO_RegisterOpcode(0x0AD8, opcode_0AD8); + CLEO_RegisterOpcode(0x0AD9, opcode_0AD9); + CLEO_RegisterOpcode(0x0ADA, opcode_0ADA); + CLEO_RegisterOpcode(0x2300, opcode_2300); + CLEO_RegisterOpcode(0x2301, opcode_2301); + CLEO_RegisterOpcode(0x0AAB, Script_FS_FileExists); CLEO_RegisterOpcode(0x0AE4, Script_FS_DirectoryExists); CLEO_RegisterOpcode(0x0AE5, Script_FS_CreateDirectory); @@ -58,6 +86,94 @@ class FileSystemOperations return path; } + //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET + static OpcodeResult WINAPI opcode_0A9A(CRunningScript* thread) + { + auto filename = ReadPathParam(thread); + + char mode[16]; + auto paramType = CLEO_GetOperandType(thread); + if (IsImmInteger(paramType) || IsVariable(paramType)) + { + // integer param (for backward compatibility with CLEO 3) + union + { + DWORD uParam; + char strParam[4]; + } param; + param.uParam = CLEO_GetIntOpcodeParam(thread); + strcpy(mode, param.strParam); + } + else + { + CLEO_ReadStringOpcodeParam(thread, mode, sizeof(mode)); + } + + // either CLEO 3 or CLEO 4 made a big mistake! (they differ in one major unapparent preference) + // lets try to resolve this with a legacy mode + bool legacy = CLEO_GetScriptVersion(thread) < CLEO_VER_4_3; + + auto handle = File::open(filename.c_str(), mode, legacy); + if (!File::isOk(handle)) + { + CLEO_SetIntOpcodeParam(thread, NULL); + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + + m_hFiles.insert(handle); + CLEO_SetIntOpcodeParam(thread, handle); + CLEO_SetThreadCondResult(thread, true); + return OR_CONTINUE; + } + + //0A9B=1,closefile %1d% + static OpcodeResult WINAPI opcode_0A9B(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + + if (m_hFiles.find(handle) != m_hFiles.end()) + { + File::close(handle); + m_hFiles.erase(handle); + } + return OR_CONTINUE; + } + + //0A9C=2,%2d% = file %1d% size + static OpcodeResult WINAPI opcode_0A9C(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + + auto size = File::getSize(handle); + CLEO_SetIntOpcodeParam(thread, size); + return OR_CONTINUE; + } + + //0A9D=3,readfile %1d% size %2d% to %3d% + static OpcodeResult WINAPI opcode_0A9D(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + DWORD size = CLEO_GetIntOpcodeParam(thread); + SCRIPT_VAR* buffer = CLEO_GetPointerToScriptVariable(thread); + + buffer->dwParam = 0; // https://github.com/cleolibrary/CLEO4/issues/91 + File::read(handle, buffer, size); + return OR_CONTINUE; + } + + //0A9E=3,writefile %1d% size %2d% from %3d% + static OpcodeResult WINAPI opcode_0A9E(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + DWORD size = CLEO_GetIntOpcodeParam(thread); + SCRIPT_VAR* buffer = CLEO_GetPointerToScriptVariable(thread); + + File::write(handle, buffer, size); + if (File::isOk(handle)) File::flush(handle); + return OR_CONTINUE; + } + // 0AAB=1, file_exists %1s% static OpcodeResult WINAPI Script_FS_FileExists(CRunningScript* thread) { @@ -70,6 +186,181 @@ class FileSystemOperations return OR_CONTINUE; } + //0AD5=3,file %1d% seek %2d% from_origin %3d% //IF and SET + static OpcodeResult WINAPI opcode_0AD5(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + int offset = (int)CLEO_GetIntOpcodeParam(thread); + DWORD origin = CLEO_GetIntOpcodeParam(thread); + + bool ok = File::seek(handle, offset, origin); + CLEO_SetThreadCondResult(thread, ok); + return OR_CONTINUE; + } + + //0AD6=1,end_of_file %1d% reached + static OpcodeResult WINAPI opcode_0AD6(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + + bool end = !File::isOk(handle) || File::isEndOfFile(handle); + CLEO_SetThreadCondResult(thread, end); + return OR_CONTINUE; + } + + //0AD7=3,read_string_from_file %1d% to %2d% size %3d% //IF and SET + static OpcodeResult WINAPI opcode_0AD7(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + + char* buffer = nullptr; + int bufferSize = 0; + DWORD needsTerminator = TRUE; + CLEO_ReadStringParamWriteBuffer(thread, &buffer, &bufferSize, &needsTerminator); + + int size = CLEO_GetIntOpcodeParam(thread); + if (size == 0) + { + if (bufferSize > 0) buffer[0] = '\0'; + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + if (size < 0) + { + auto info = scriptInfoStr(thread); + SHOW_ERROR("Invalid size argument (%d) in opcode [0AD7] in script %s\nScript suspended.", size, info.c_str()); + return thread->Suspend(); + } + + std::vector tmpBuff; + tmpBuff.resize(size); + auto data = tmpBuff.data(); + + bool ok = File::readString(handle, data, size) != nullptr; + if(!ok) + { + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + + // copy into result param + int len = strlen(data); + int resultSize = min(len, bufferSize - (int)needsTerminator); + + memcpy(buffer, data, resultSize); + if(resultSize < bufferSize) buffer[resultSize] = '\0'; // terminate string whenever possible + + CLEO_SetThreadCondResult(thread, true); + return OR_CONTINUE; + } + + //0AD8=2,write_string_to_file %1d% from %2d% //IF and SET + static OpcodeResult WINAPI opcode_0AD8(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + auto text = CLEO_ReadStringOpcodeParam(thread); + + auto ok = File::writeString(handle, text); + if (!ok) + { + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + + File::flush(handle); + CLEO_SetThreadCondResult(thread, true); + return OR_CONTINUE; + } + + //0AD9=-1,write_formated_text %2d% to_file %1d% + static OpcodeResult WINAPI opcode_0AD9(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + auto format = CLEO_ReadStringOpcodeParam(thread); + static char text[4 * MAX_STR_LEN]; CLEO_ReadParamsFormatted(thread, format, text, MAX_STR_LEN); + + auto ok = File::writeString(handle, text); + if (!ok) + { + return OR_CONTINUE; + } + + File::flush(handle); + return OR_CONTINUE; + } + + //0ADA=-1,%3d% = scan_file %1d% format %2d% //IF and SET + static OpcodeResult WINAPI opcode_0ADA(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + auto format = CLEO_ReadStringOpcodeParam(thread); + auto result = (DWORD*)CLEO_GetPointerToScriptVariable(thread); + + size_t paramCount = 0; + SCRIPT_VAR* outputParams[35]; + while (CLEO_GetOperandType(thread) != eDataType::DT_END) + { + // TODO: if target param is string variable it should be handled correctly + outputParams[paramCount++] = CLEO_GetPointerToScriptVariable(thread); + } + CLEO_SkipUnusedVarArgs(thread); // var arg terminator + + *result = File::scan(handle, format, (void**)&outputParams); + + //CLEO_SetThreadCondResult(thread, paramCount == *result); + CLEO_SetThreadCondResult(thread, true); + return OR_CONTINUE; + } + + //2300=2,get_file_position %1d% store_to %2d% + static OpcodeResult WINAPI opcode_2300(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + + auto pos = File::getPos(handle); + CLEO_SetIntOpcodeParam(thread, pos); + return OR_CONTINUE; + } + + //2301=3,read_block_from_file %1d% size %2d% buffer %3d% // IF and SET + static OpcodeResult WINAPI opcode_2301(CRunningScript* thread) + { + DWORD handle = READ_HANDLE_PARAM(); + DWORD size = CLEO_GetIntOpcodeParam(thread); + + auto paramType = CLEO_GetOperandType(thread); + if(!IsImmInteger(paramType) && !IsVariable(paramType)) + { + auto info = scriptInfoStr(thread); + SHOW_ERROR("Invalid type (0x%02X) of 'address' argument in opcode [2301] in script %s\nScript suspended.", paramType, info.c_str()); + return thread->Suspend(); + } + DWORD target = CLEO_GetIntOpcodeParam(thread); OPCODE_VALIDATE_POINTER(target) + + if(size < 0) + { + auto info = scriptInfoStr(thread); + SHOW_ERROR("Invalid size argument (%d) in opcode [2301] in script %s\nScript suspended.", size, info.c_str()); + return thread->Suspend(); + } + + if (size == 0) + { + CLEO_SetThreadCondResult(thread, true); // done + return OR_CONTINUE; + } + + auto readCount = File::read(handle, (void*)target, size); + if (readCount != size) + { + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + + CLEO_SetThreadCondResult(thread, true); + return OR_CONTINUE; + } + // 0AE4=1, directory_exist %1s% static OpcodeResult WINAPI Script_FS_DirectoryExists(CRunningScript* thread) { @@ -122,6 +413,15 @@ class FileSystemOperations { auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + if (m_hFileSearches.find(handle) == m_hFileSearches.end()) + { + auto info = scriptInfoStr(thread); + LOG_WARNING(thread, "[0AE7] used with handle (0x%X) to unknown or already closed file search in script %s", handle, info.c_str()); + CLEO_SkipOpcodeParams(thread, 1); + CLEO_SetThreadCondResult(thread, false); + return OR_CONTINUE; + } + WIN32_FIND_DATA ffd = { 0 }; if (FindNextFile(handle, &ffd)) { @@ -140,6 +440,14 @@ class FileSystemOperations static OpcodeResult WINAPI Script_FS_FindClose(CRunningScript* thread) { auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + + if (m_hFileSearches.find(handle) == m_hFileSearches.end()) + { + auto info = scriptInfoStr(thread); + LOG_WARNING(thread, "[0AE8] used with handle (0x%X) to unknown or already closed file search in script %s", handle, info.c_str()); + return OR_CONTINUE; + } + FindClose(handle); m_hFileSearches.erase(handle); return OR_CONTINUE; @@ -150,8 +458,8 @@ class FileSystemOperations { auto filename = ReadPathParam(thread); - CLEO_SetThreadCondResult(thread, DeleteFile(filename.c_str())); - + auto success = DeleteFile(filename.c_str()); + CLEO_SetThreadCondResult(thread, success); return OR_CONTINUE; } @@ -209,12 +517,12 @@ class FileSystemOperations BOOL result; if (DeleteAllInsideFlag) { - //remove directory with all files and subdirectories + // remove directory with all files and subdirectories result = DeleteDir(dirpath.c_str()); } else { - //try to remove as empty directory + // try to remove as empty directory result = RemoveDirectory(dirpath.c_str()); } @@ -333,4 +641,5 @@ class FileSystemOperations } } fileSystemOperations; +std::set FileSystemOperations::m_hFiles; std::set FileSystemOperations::m_hFileSearches; diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index a309c585..a338af16 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -111,6 +111,11 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters index 5ba189d8..6e505e7d 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters @@ -2,5 +2,10 @@ + + + + + \ No newline at end of file diff --git a/cleo_plugins/FileSystemOperations/FileUtils.cpp b/cleo_plugins/FileSystemOperations/FileUtils.cpp new file mode 100644 index 00000000..bb698681 --- /dev/null +++ b/cleo_plugins/FileSystemOperations/FileUtils.cpp @@ -0,0 +1,425 @@ +#include "FileUtils.h" +#include + +DWORD File::FUNC_fopen = 0; +DWORD File::FUNC_fclose = 0; +DWORD File::FUNC_fread = 0; +DWORD File::FUNC_fwrite = 0; +DWORD File::FUNC_fgetc = 0; +DWORD File::FUNC_fgets = 0; +DWORD File::FUNC_fputs = 0; +DWORD File::FUNC_fseek = 0; +DWORD File::FUNC_fprintf = 0; +DWORD File::FUNC_ftell = 0; +DWORD File::FUNC_fflush = 0; +DWORD File::FUNC_feof = 0; +DWORD File::FUNC_ferror = 0; + +void File::initialize(CLEO::eGameVersion version) +{ + // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM + const DWORD MA_FOPEN_FUNCTION[] = { 0x008232D8, 0, 0x00823318, 0x00824098, 0x0085C75E }; + const DWORD MA_FCLOSE_FUNCTION[] = { 0x0082318B, 0, 0x008231CB, 0x00823F4B, 0x0085C396 }; + const DWORD MA_FGETC_FUNCTION[] = { 0x008231DC, 0, 0x0082321C, 0x00823F9C, 0x0085C680 }; + const DWORD MA_FGETS_FUNCTION[] = { 0x00823798, 0, 0x008237D8, 0x00824558, 0x0085D00C }; + const DWORD MA_FPUTS_FUNCTION[] = { 0x008262B8, 0, 0x008262F8, 0x00826BA8, 0x008621F1 }; + const DWORD MA_FREAD_FUNCTION[] = { 0x00823521, 0, 0x00823561, 0x008242E1, 0x0085CD04 }; + const DWORD MA_FWRITE_FUNCTION[] = { 0x00823674, 0, 0x008236B4, 0x00824434, 0x0085CE7E }; + const DWORD MA_FSEEK_FUNCTION[] = { 0x0082374F, 0, 0x0082378F, 0x0082450F, 0x0085CF87 }; + const DWORD MA_FPRINTF_FUNCTION[] = { 0x00823A30, 0, 0x00823A70, 0x008247F0, 0x0085D464 }; + const DWORD MA_FTELL_FUNCTION[] = { 0x00826261, 0, 0x008262A1, 0x00826B51, 0x00862183 }; + const DWORD MA_FFLUSH_FUNCTION[] = { 0x00823E86, 0, 0x00823EC6, 0x00824C46, 0x0085DDDD }; + const DWORD MA_FEOF_FUNCTION[] = { 0x008262A2, 0, 0x008262E2, 0x00826B92, 0x0085D193 }; + const DWORD MA_FERROR_FUNCTION[] = { 0x008262AD, 0, 0x008262ED, 0x00826B9D, 0x0085D1C2 }; + + FUNC_fopen = MA_FOPEN_FUNCTION[version]; + FUNC_fclose = MA_FCLOSE_FUNCTION[version]; + FUNC_fread = MA_FREAD_FUNCTION[version]; + FUNC_fwrite = MA_FWRITE_FUNCTION[version]; + FUNC_fgetc = MA_FGETC_FUNCTION[version]; + FUNC_fgets = MA_FGETS_FUNCTION[version]; + FUNC_fputs = MA_FPUTS_FUNCTION[version]; + FUNC_fseek = MA_FSEEK_FUNCTION[version]; + FUNC_fprintf = MA_FPRINTF_FUNCTION[version]; + FUNC_ftell = MA_FTELL_FUNCTION[version]; + FUNC_fflush = MA_FFLUSH_FUNCTION[version]; + FUNC_feof = MA_FEOF_FUNCTION[version]; + FUNC_ferror = MA_FERROR_FUNCTION[version];; +} + +bool File::isLegacy(DWORD handle) { return (handle & 0x1) == 0; } + +FILE* File::handleToFile(DWORD handle) { return (FILE*)(handle & ~0x1); } + +DWORD File::fileToHandle(FILE* file, bool legacy) +{ + if (file == nullptr) return 0; + + auto handle = (DWORD)file; + if (!legacy) handle |= 0x1; + return handle; +} + +bool File::flush(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return false; + + int result = 0; + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_fflush + add esp, 0x4 + mov result, eax + } + } + else + result = fflush(file); + + return result == 0; +} + +DWORD File::open(const char* filename, const char* mode, bool legacy) +{ + FILE* file = nullptr; + if (legacy) + { + _asm + { + push mode + push filename + call FUNC_fopen + add esp, 8 + mov file, eax + } + } + else + file = fopen(filename, mode); + + return fileToHandle(file, legacy); +} + +void File::close(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return; + + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_fclose + add esp, 4 + } + } + else + fclose(file); +} + +bool File::isOk(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return false; + + int result = 0; + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_ferror + add esp, 0x4 + } + } + else + result = ferror(file); + + return result == 0; +} + +DWORD File::getSize(DWORD handle) +{ + auto pos = getPos(handle); + seek(handle, 0, SEEK_END); + DWORD size = getPos(handle); + seek(handle, pos, SEEK_SET); + return size; +} + +bool File::seek(DWORD handle, int offset, DWORD orign) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return false; + + int result = 0; + if (isLegacy(handle)) + { + auto off = offset; // 'offset' is keyword in asm + _asm + { + push orign + push off + push file + call FUNC_fseek + add esp, 0xC + mov result, eax + } + } + else + result = fseek(file, offset, orign); + + return result == 0; +} + +DWORD File::getPos(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + DWORD pos = 0; + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_ftell + add esp, 0x4 + mov pos, eax + } + } + else + pos = (DWORD)ftell(file); + + return pos; +} + +bool File::isEndOfFile(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return true; + + int result = 0; + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_feof + add esp, 0x4 + mov result, eax + } + } + else + result = feof(file); + + return result != 0; +} + +DWORD File::read(DWORD handle, void* buffer, DWORD size) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + DWORD read = 0; + if (isLegacy(handle)) + { + auto siz = size; // 'size' is keyword in asm + _asm + { + push file + push siz + push 1 + push buffer + call FUNC_fread + add esp, 0x10 + mov read, eax + } + } + else + read = fread(buffer, 1, size, file); + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return read; +} + +char File::readChar(DWORD handle) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + char result = '_'; + if (isLegacy(handle)) + { + _asm + { + push file + call FUNC_fgetc + add esp, 0x4 + mov result, al + } + } + else + result = fgetc(file); + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return result; +} + +char* File::readString(DWORD handle, char* buffer, DWORD bufferSize) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return nullptr; + + char* result = nullptr; + if (isLegacy(handle)) + { + _asm + { + push file + push bufferSize + push buffer + call FUNC_fgets + add esp, 0xC + mov result, eax + } + } + else + result = fgets(buffer, bufferSize, file); + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return result; +} + +DWORD File::write(DWORD handle, const void* buffer, DWORD size) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + DWORD writen = 0; + if (isLegacy(handle)) + { + auto siz = size; // 'size' is keyword in asm + _asm + { + push file + push siz + push 1 + push buffer + call FUNC_fwrite + add esp, 0x10 + mov writen, eax + } + } + else + writen = (DWORD)fwrite(buffer, 1, size, file); + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return writen; +} + +bool File::writeString(DWORD handle, const char* text) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + int result = 0; + if (isLegacy(handle)) + { + _asm + { + push file + push text + call FUNC_fputs + add esp, 0x8 + mov result, eax + } + } + else + result = (DWORD)fputs(text, file); + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return result >= 0; +} + +DWORD File::scan(DWORD handle, const char* format, void** outputParams) +{ + FILE* file = handleToFile(handle); + if (file == nullptr) return 0; + + int read = 0; + if (isLegacy(handle)) + { + // fscanf not existent in game's code. Emulate it + + size_t paramCount = 0; + const char* f = format; + while(*f != '\0') + { + if (*f == '%') + { + if (*(f + 1) != '%') // not escaped % + paramCount++; + else + f++; // skip escaped + } + f++; + } + + auto newFormat = std::string(format) + "%n"; // %n returns characters processed by + DWORD charRead = 0; + outputParams[paramCount] = &charRead; + + DWORD prevCharRead = 0; + std::string readText; + while(true) + { + readText += readChar(handle); + + prevCharRead = charRead; + auto p = outputParams; + read = sscanf(readText.c_str(), newFormat.c_str(), + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], // 10 + p[10], p[11], p[12], p[13], p[14], p[15], p[16], p[17], p[18], p[19], // 20 + p[20], p[21], p[22], p[23], p[24], p[25], p[26], p[27], p[28], p[29], // 30 + p[30], p[31], p[32], p[33], p[34]); // 35 + + if (!isOk(handle)) break; + + if (read == paramCount) + { + if (charRead == prevCharRead) // all params collected and scan doesn't consume input text anymore + { + seek(handle, -1, SEEK_CUR); // return the character not used by scan + break; + } + + if (isEndOfFile(handle)) + { + break; + } + } + } + } + else + { + auto p = outputParams; + read = fscanf(file, format, + p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], // 10 + p[10], p[11], p[12], p[13], p[14], p[15], p[16], p[17], p[18], p[19], // 20 + p[20], p[21], p[22], p[23], p[24], p[25], p[26], p[27], p[28], p[29], // 30 + p[30], p[31], p[32], p[33], p[34]); // 35 + } + + seek(handle, 0, SEEK_CUR); // required for RW streams (https://en.wikibooks.org/wiki/C_Programming/stdio.h/fopen) + + return read; +} diff --git a/cleo_plugins/FileSystemOperations/FileUtils.h b/cleo_plugins/FileSystemOperations/FileUtils.h new file mode 100644 index 00000000..31eb52d7 --- /dev/null +++ b/cleo_plugins/FileSystemOperations/FileUtils.h @@ -0,0 +1,49 @@ +#pragma once +#include "CLEO.h" +#include +#include + +class File +{ +public: + static void initialize(CLEO::eGameVersion version); + + static DWORD open(const char* filename, const char* mode, bool legacy); + static void close(DWORD handle); + + static bool isOk(DWORD handle); + + static DWORD getSize(DWORD handle); + static bool seek(DWORD handle, int offset, DWORD orign); + static DWORD getPos(DWORD handle); + static bool isEndOfFile(DWORD handle); + + static DWORD read(DWORD handle, void* buffer, DWORD size); + static char readChar(DWORD handle); + static char* readString(DWORD handle, char* buffer, DWORD bufferSize); + + static DWORD write(DWORD handle, const void* buffer, DWORD size); + static bool writeString(DWORD handle, const char* text); + static DWORD scan(DWORD handle, const char* format, void** outputParams); + static bool flush(DWORD handle); + + static bool isLegacy(DWORD handle); // Legacy modes for CLEO 3 + static FILE* handleToFile(DWORD handle); + +private: + static DWORD FUNC_fopen; + static DWORD FUNC_fclose; + static DWORD FUNC_fread; + static DWORD FUNC_fwrite; + static DWORD FUNC_fgetc; + static DWORD FUNC_fgets; + static DWORD FUNC_fputs; + static DWORD FUNC_fseek; + static DWORD FUNC_fprintf; + static DWORD FUNC_ftell; + static DWORD FUNC_fflush; + static DWORD FUNC_feof; + static DWORD FUNC_ferror; + + static DWORD fileToHandle(FILE* file, bool legacy); +}; diff --git a/cleo_plugins/FileSystemOperations/Utils.h b/cleo_plugins/FileSystemOperations/Utils.h new file mode 100644 index 00000000..25b6d76d --- /dev/null +++ b/cleo_plugins/FileSystemOperations/Utils.h @@ -0,0 +1,90 @@ +#pragma once +#include "CLEO.h" +#include +#include + +std::string stringPrintf(const char* format, ...) +{ + va_list args; + + va_start(args, format); + auto len = std::vsnprintf(nullptr, 0, format, args) + 1; + va_end(args); + + std::string result(len, '\0'); + + va_start(args, format); + std::vsnprintf(result.data(), result.length(), format, args); + va_end(args); + + return result; +} + +std::string scriptInfoStr(CLEO::CRunningScript* thread) +{ + std::string info(1024, '\0'); + CLEO_GetScriptInfoStr(thread, true, info.data(), info.length()); + return std::move(info); +} + +const char* TraceVArg(CLEO::eLogLevel level, const char* format, va_list args) +{ + static char szBuf[1024]; + vsprintf(szBuf, format, args); // put params into format + CLEO_Log(level, szBuf); + return szBuf; +} + +void Trace(CLEO::eLogLevel level, const char* format, ...) +{ + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); +} + +void Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...) +{ + if (CLEO_GetScriptVersion(thread) < CLEO::eCLEO_Version::CLEO_VER_5) + { + return; // do not log this in older versions + } + + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); +} + +void ShowError(const char* format, ...) +{ + va_list args; + va_start(args, format); + auto msg = TraceVArg(CLEO::eLogLevel::Error, format, args); + va_end(args); + + QUERY_USER_NOTIFICATION_STATE pquns; + SHQueryUserNotificationState(&pquns); + bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); + + if (fullscreen) + { + PostMessage(NULL, WM_SYSCOMMAND, SC_MINIMIZE, 0); + ShowWindow(NULL, SW_MINIMIZE); + } + + MessageBox(NULL, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + + if (fullscreen) + { + PostMessage(NULL, WM_SYSCOMMAND, SC_RESTORE, 0); + ShowWindow(NULL, SW_RESTORE); + } +} + +#define TRACE(format,...) {Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} +#define LOG_WARNING(script, format, ...) {Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} +#define SHOW_ERROR(a,...) {ShowError(a, __VA_ARGS__);} + +static const size_t MinValidAddress = 0x10000; // used for validation of pointers received from scripts. First 64kb are for sure reserved by Windows. +#define OPCODE_VALIDATE_POINTER(x) if((size_t)x <= MinValidAddress) { auto info = scriptInfoStr(thread); SHOW_ERROR("Invalid '0x%X' pointer param in script %s \nScript suspended.", x, info.c_str()); return thread->Suspend(); } diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 653c2823..7344f768 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -269,6 +269,14 @@ enum class eLogLevel : DWORD Default // all log messages }; +enum OpcodeResult : char +{ + OR_NONE = -2, + OR_ERROR = -1, + OR_CONTINUE = 0, + OR_INTERRUPT = 1, +}; + typedef int SCRIPT_HANDLE; typedef SCRIPT_HANDLE HANDLE_ACTOR, ACTOR, HACTOR, PED, HPED, HANDLE_PED; typedef SCRIPT_HANDLE HANDLE_CAR, CAR, HCAR, VEHICLE, HVEHICLE, HANDLE_VEHICLE; @@ -354,6 +362,7 @@ struct CRunningScript CRunningScript* GetPrev() const { return Previous; } void SetIsExternal(bool b) { bIsExternal = b; } void SetActive(bool b) { bIsActive = b; } + OpcodeResult Suspend() { WakeTime = 0xFFFFFFFF; return OpcodeResult::OR_INTERRUPT; } // suspend script execution forever void SetNext(CRunningScript* v) { Next = v; } void SetPrev(CRunningScript* v) { Previous = v; } SCRIPT_VAR* GetVarPtr() { return LocalVar; } @@ -398,14 +407,6 @@ static_assert(sizeof(CRunningScript) == 0xE0, "Invalid size of CRunningScript!") typedef struct CRunningScript CScriptThread; #endif -enum OpcodeResult : char -{ - OR_NONE = -2, - OR_ERROR = -1, - OR_CONTINUE = 0, - OR_INTERRUPT = 1, -}; - typedef OpcodeResult (CALLBACK* _pOpcodeHandler)(CRunningScript*); typedef void(*FuncScriptDeleteDelegateT) (CRunningScript*); @@ -440,6 +441,7 @@ DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // exactly same as CLEO_ReadStringOpcodeParam +void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator // param skip without reading diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 33c22058..2f4af817 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -11,9 +11,9 @@ #include #include -#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } -#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return CCustomOpcodeSystem::ErrorSuspendScript(thread); } +#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } +#define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } +#define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } namespace CLEO { @@ -22,20 +22,6 @@ namespace CLEO template inline CRunningScript& operator<<(CRunningScript& thread, memory_pointer pval); template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); - DWORD FUNC_fopen; - DWORD FUNC_fclose; - DWORD FUNC_fwrite; - DWORD FUNC_fread; - DWORD FUNC_fgetc; - DWORD FUNC_fgets; - DWORD FUNC_fputs; - DWORD FUNC_fseek; - DWORD FUNC_fprintf; - DWORD FUNC_ftell; - DWORD FUNC_fflush; - DWORD FUNC_feof; - DWORD FUNC_ferror; - OpcodeResult __stdcall opcode_0A8C(CRunningScript *thread); OpcodeResult __stdcall opcode_0A8D(CRunningScript *thread); OpcodeResult __stdcall opcode_0A8E(CRunningScript *thread); @@ -50,11 +36,6 @@ namespace CLEO OpcodeResult __stdcall opcode_0A97(CRunningScript *thread); OpcodeResult __stdcall opcode_0A98(CRunningScript *thread); OpcodeResult __stdcall opcode_0A99(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9B(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9C(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9D(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9E(CRunningScript *thread); OpcodeResult __stdcall opcode_0A9F(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA0(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA1(CRunningScript *thread); @@ -108,12 +89,6 @@ namespace CLEO OpcodeResult __stdcall opcode_0AD2(CRunningScript *thread); OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread); OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD5(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD6(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD7(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD8(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD9(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADA(CRunningScript *thread); OpcodeResult __stdcall opcode_0ADB(CRunningScript *thread); OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread); OpcodeResult __stdcall opcode_0ADD(CRunningScript *thread); @@ -229,7 +204,7 @@ namespace CLEO if(opcode > LastCustomOpcode) { SHOW_ERROR("Opcode [%04X] out of supported range! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return ErrorSuspendScript(thread); + return thread->Suspend(); } CustomOpcodeHandler handler = customOpcodeProc[opcode]; @@ -244,7 +219,7 @@ namespace CLEO if (opcode > LastOriginalOpcode) { SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nPreviously called opcode: [%04X]\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str(), prevOpcode); - return ErrorSuspendScript(thread); + return thread->Suspend(); } size_t tableIdx = opcode / 100; // 100 opcodes peer handler table @@ -253,7 +228,7 @@ namespace CLEO if(result == OR_ERROR) { SHOW_ERROR("Opcode [%04X] not found! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return ErrorSuspendScript(thread); + return thread->Suspend(); } } @@ -280,27 +255,27 @@ namespace CLEO if ((size_t)func <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("Invalid '0x%X' function pointer param of opcode [%04X] in script %s\nScript suspended.", func, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } if (thisCall && (size_t)struc <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("Invalid '0x%X' struct pointer param of opcode [%04X] in script %s\nScript suspended.", struc, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } int nVarArg = GetVarArgCount(thread); if (numParams + returnArg != nVarArg) // and return argument { SHOW_ERROR("Opcode [%04X] declared %d input args, but provided %d in script %s\nScript suspended.", opcode, numParams, (int)nVarArg - returnArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } constexpr size_t Max_Args = 32; if (numParams > Max_Args) { SHOW_ERROR("Opcode [%04X] used with more than supported arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } static SCRIPT_VAR arguments[Max_Args] = { 0 }; @@ -329,7 +304,7 @@ namespace CLEO if (currTextParam >= Max_Text_Params) { SHOW_ERROR("Opcode [%04X] used with more than supported string arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } param.pcParam = ReadStringParam(thread, textParams[currTextParam], MAX_STR_LEN); OPCODE_VALIDATE_STR_ARG_READ(param.pcParam) @@ -338,7 +313,7 @@ namespace CLEO else { SHOW_ERROR("Invalid param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } @@ -350,7 +325,7 @@ namespace CLEO if (!IsVariable(paramType) && !IsVarString(paramType)) { SHOW_ERROR("Invalid return param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } @@ -381,17 +356,10 @@ namespace CLEO return OR_CONTINUE; } - OpcodeResult CCustomOpcodeSystem::ErrorSuspendScript(CRunningScript* thread) - { - //thread->SetActive(false): // will crash game if no active script left - ((CCustomScript*)thread)->WakeTime = 0xFFFFFFFF; - return OpcodeResult::OR_INTERRUPT; - } - void CCustomOpcodeSystem::FinalizeScriptObjects() { - TRACE("Cleaning up script data... %u files, %u libs, %u allocations...", - m_hFiles.size(), m_hNativeLibs.size(), m_pAllocations.size() + TRACE("Cleaning up script data... %u libs, %u allocations...", + m_hNativeLibs.size(), m_pAllocations.size() ); for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsFinalize)) @@ -400,14 +368,6 @@ namespace CLEO ((callback*)func)(); } - // clean up after opcode_0A9A - for (auto i = m_hFiles.begin(); i != m_hFiles.end(); ++i) - { - if (!is_legacy_handle(*i)) - fclose(convert_handle_to_file(*i)); - } - m_hFiles.clear(); - // clean up after opcode_0AA2 std::for_each(m_hNativeLibs.begin(), m_hNativeLibs.end(), FreeLibrary); m_hNativeLibs.clear(); @@ -437,11 +397,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0A97, opcode_0A97); CLEO_RegisterOpcode(0x0A98, opcode_0A98); CLEO_RegisterOpcode(0x0A99, opcode_0A99); - CLEO_RegisterOpcode(0x0A9A, opcode_0A9A); - CLEO_RegisterOpcode(0x0A9B, opcode_0A9B); - CLEO_RegisterOpcode(0x0A9C, opcode_0A9C); - CLEO_RegisterOpcode(0x0A9D, opcode_0A9D); - CLEO_RegisterOpcode(0x0A9E, opcode_0A9E); CLEO_RegisterOpcode(0x0A9F, opcode_0A9F); CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); @@ -495,12 +450,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0AD2, opcode_0AD2); CLEO_RegisterOpcode(0x0AD3, opcode_0AD3); CLEO_RegisterOpcode(0x0AD4, opcode_0AD4); - CLEO_RegisterOpcode(0x0AD5, opcode_0AD5); - CLEO_RegisterOpcode(0x0AD6, opcode_0AD6); - CLEO_RegisterOpcode(0x0AD7, opcode_0AD7); - CLEO_RegisterOpcode(0x0AD8, opcode_0AD8); - CLEO_RegisterOpcode(0x0AD9, opcode_0AD9); - CLEO_RegisterOpcode(0x0ADA, opcode_0ADA); CLEO_RegisterOpcode(0x0ADB, opcode_0ADB); CLEO_RegisterOpcode(0x0ADC, opcode_0ADC); CLEO_RegisterOpcode(0x0ADD, opcode_0ADD); @@ -547,20 +496,6 @@ namespace CLEO MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); MemWrite(0x00469EF0, &customOpcodeHandlers); // TODO: game version translation - FUNC_fopen = gvm.TranslateMemoryAddress(MA_FOPEN_FUNCTION); - FUNC_fclose = gvm.TranslateMemoryAddress(MA_FCLOSE_FUNCTION); - FUNC_fread = gvm.TranslateMemoryAddress(MA_FREAD_FUNCTION); - FUNC_fwrite = gvm.TranslateMemoryAddress(MA_FWRITE_FUNCTION); - FUNC_fgetc = gvm.TranslateMemoryAddress(MA_FGETC_FUNCTION); - FUNC_fgets = gvm.TranslateMemoryAddress(MA_FGETS_FUNCTION); - FUNC_fputs = gvm.TranslateMemoryAddress(MA_FPUTS_FUNCTION); - FUNC_fseek = gvm.TranslateMemoryAddress(MA_FSEEK_FUNCTION); - FUNC_fprintf = gvm.TranslateMemoryAddress(MA_FPRINTF_FUNCTION); - FUNC_ftell = gvm.TranslateMemoryAddress(MA_FTELL_FUNCTION); - FUNC_fflush = gvm.TranslateMemoryAddress(MA_FFLUSH_FUNCTION); - FUNC_feof = gvm.TranslateMemoryAddress(MA_FEOF_FUNCTION); - FUNC_ferror = gvm.TranslateMemoryAddress(MA_FERROR_FUNCTION); - pedPool = gvm.TranslateMemoryAddress(MA_PED_POOL); vehiclePool = gvm.TranslateMemoryAddress(MA_VEHICLE_POOL); objectPool = gvm.TranslateMemoryAddress(MA_OBJECT_POOL); @@ -1118,7 +1053,7 @@ namespace CLEO if (scmFunc == nullptr) { SHOW_ERROR("Invalid Cleo Call reference. [%04X] possibly used without preceding [0AB1] in script %s\nScript suspended.", opcode, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } // store return arguments @@ -1130,14 +1065,14 @@ namespace CLEO if (returnArgCount > 32) { SHOW_ERROR("Opcode [%04X] has too many (%d) args in script %s\nScript suspended.", opcode, returnArgCount, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } auto nVarArg = GetVarArgCount(thread); if (returnArgCount > nVarArg) { SHOW_ERROR("Opcode [%04X] declared %d args, but %d was provided in script %s\nScript suspended.", opcode, returnArgCount, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } for (DWORD i = 0; i < returnArgCount; i++) @@ -1165,7 +1100,7 @@ namespace CLEO else { SHOW_ERROR("Invalid argument type '0x%02X' in opcode [%04X] in script %s\nScript suspended.", paramType, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } } @@ -1181,7 +1116,7 @@ namespace CLEO if (returnSlotCount > returnArgCount || (strictArgCount && returnSlotCount < returnArgCount)) { SHOW_ERROR("Opcode [%04X] returned %d params, while function caller expected %d in script %s\nScript suspended.", opcode, returnArgCount, returnSlotCount, cs->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(cs); + return cs->Suspend(); } else if (returnSlotCount < returnArgCount) { @@ -1210,7 +1145,7 @@ namespace CLEO else { SHOW_ERROR("Invalid output argument type '0x%02X' in opcode [%04X] in script %s\nScript suspended.", paramType, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } } @@ -1220,176 +1155,6 @@ namespace CLEO return OR_CONTINUE; } - // Legacy modes for CLEO 3 - FILE* legacy_fopen(const char* szPath, const char* szMode) - { - FILE* hFile; - _asm - { - push szMode - push szPath - call FUNC_fopen - add esp, 8 - mov hFile, eax - } - return hFile; - } - void legacy_fclose(FILE * hFile) - { - _asm - { - push hFile - call FUNC_fclose - add esp, 4 - } - } - size_t legacy_fread(void * buf, size_t len, size_t count, FILE * stream) - { - _asm - { - push stream - push count - push len - push buf - call FUNC_fread - add esp, 0x10 - } - } - size_t legacy_fwrite(const void * buf, size_t len, size_t count, FILE * stream) - { - _asm - { - push stream - push count - push len - push buf - call FUNC_fwrite - add esp, 0x10 - } - } - char legacy_fgetc(FILE * stream) - { - _asm - { - push stream - call FUNC_fgetc - add esp, 0x4 - } - } - char * legacy_fgets(char *pStr, int num, FILE * stream) - { - _asm - { - push stream - push num - push pStr - call FUNC_fgets - add esp, 0xC - } - } - int legacy_fputs(const char *pStr, FILE * stream) - { - _asm - { - push stream - push pStr - call FUNC_fputs - add esp, 0x8 - } - } - int legacy_fseek(FILE * stream, long int offs, int original) - { - _asm - { - push stream - push offs - push original - call FUNC_fseek - add esp, 0xC - } - } - int legacy_ftell(FILE * stream) - { - _asm - { - push stream - call FUNC_ftell - add esp, 0x4 - } - } - int __declspec(naked) fprintf(FILE * stream, const char * format, ...) - { - _asm jmp FUNC_fprintf - } - int legacy_fflush(FILE * stream) - { - _asm - { - push stream - call FUNC_fflush - add esp, 0x4 - } - } - int legacy_feof(FILE * stream) - { - _asm - { - push stream - call FUNC_feof - add esp, 0x4 - } - } - int legacy_ferror(FILE * stream) - { - _asm - { - push stream - call FUNC_ferror - add esp, 0x4 - } - } - - bool is_legacy_handle(DWORD dwHandle) { return (dwHandle & 0x1) == 0; } - FILE * convert_handle_to_file(DWORD dwHandle) { return dwHandle ? reinterpret_cast(is_legacy_handle(dwHandle) ? dwHandle : dwHandle & ~(0x1)) : nullptr; } - - inline DWORD open_file(const char * szPath, const char * szMode, bool bLegacy) - { - FILE * hFile = bLegacy ? legacy_fopen(szPath, szMode) : fopen(szPath, szMode); - if (hFile) return bLegacy ? (DWORD)hFile : (DWORD)hFile | 0x1; - return NULL; - } - inline void close_file(DWORD dwHandle) - { - if (is_legacy_handle(dwHandle)) legacy_fclose(convert_handle_to_file(dwHandle)); - else fclose(convert_handle_to_file(dwHandle)); - } - inline DWORD file_get_size(DWORD file_handle) - { - FILE * hFile = convert_handle_to_file(file_handle); - if (hFile) - { - auto savedPos = ftell(hFile); - fseek(hFile, 0, SEEK_END); - DWORD dwSize = static_cast(ftell(hFile)); - fseek(hFile, savedPos, SEEK_SET); - return dwSize; - } - return 0; - } - inline DWORD read_file(void *buf, DWORD size, DWORD count, DWORD hFile) - { - return is_legacy_handle(hFile) ? legacy_fread(buf, size, 1, convert_handle_to_file(hFile)) : fread(buf, size, 1, convert_handle_to_file(hFile)); - } - inline DWORD write_file(const void *buf, DWORD size, DWORD count, DWORD hFile) - { - return is_legacy_handle(hFile) ? legacy_fwrite(buf, size, 1, convert_handle_to_file(hFile)) : fwrite(buf, size, 1, convert_handle_to_file(hFile)); - } - inline void flush_file(DWORD dwHandle) - { - if (is_legacy_handle(dwHandle)) legacy_fflush(convert_handle_to_file(dwHandle)); - else fflush(convert_handle_to_file(dwHandle)); - } - inline void ThreadJump(CRunningScript *thread, int off) { thread->SetIp(off < 0 ? thread->GetBasePointer() - off : scmBlock + off); @@ -1434,7 +1199,7 @@ namespace CLEO if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8C] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } switch (size) @@ -1463,7 +1228,7 @@ namespace CLEO if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8D] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } opcodeParams[0].dwParam = 0; @@ -1480,7 +1245,7 @@ namespace CLEO break; default: SHOW_ERROR("Invalid size param '%d' of opcode [0A8D] in script %s\nScript suspended.", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } SetScriptParams(thread, 1); @@ -1664,105 +1429,6 @@ namespace CLEO return OR_CONTINUE; } - //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET - OpcodeResult __stdcall opcode_0A9A(CRunningScript *thread) - { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) - - auto filename = reinterpret_cast(thread)->ResolvePath(path); - auto paramType = *thread->GetBytePointer(); - char mode[0x10]; - - // either CLEO 3 or CLEO 4 made a big mistake! (they differ in one major unapparent preference) - // lets try to resolve this with a legacy mode - auto cs = (CCustomScript*)thread; - bool bLegacyMode = cs->IsCustom() && cs->GetCompatibility() < CLEO_VER_4_3; - - if (paramType >= 1 && paramType <= 8) - { - // integer param (for backward compatibility with CLEO 3) - union - { - DWORD uParam; - char strParam[4]; - } param; - *thread >> param.uParam; - strcpy(mode, param.strParam); - } - else - { - auto modeOk = ReadStringParam(thread, mode, sizeof(mode)); - OPCODE_VALIDATE_STR_ARG_READ(modeOk) - } - - if (auto hfile = open_file(filename.c_str(), mode, bLegacyMode)) - { - GetInstance().OpcodeSystem.m_hFiles.insert(hfile); - - *thread << hfile; - SetScriptCondResult(thread, true); - } - else - { - *thread << NULL; - SetScriptCondResult(thread, false); - } - - return OR_CONTINUE; - } - - //0A9B=1,closefile %1d% - OpcodeResult __stdcall opcode_0A9B(CRunningScript *thread) - { - DWORD hFile; - *thread >> hFile; - if (convert_handle_to_file(hFile)) - { - close_file(hFile); - GetInstance().OpcodeSystem.m_hFiles.erase(hFile); - } - return OR_CONTINUE; - } - - //0A9C=2,%2d% = file %1d% size - OpcodeResult __stdcall opcode_0A9C(CRunningScript *thread) - { - DWORD hFile; - *thread >> hFile; - if (convert_handle_to_file(hFile)) *thread << file_get_size(hFile); - return OR_CONTINUE; - } - - //0A9D=3,readfile %1d% size %2d% to %3d% - OpcodeResult __stdcall opcode_0A9D(CRunningScript *thread) - { - DWORD hFile; - DWORD size; - *thread >> hFile >> size; - - SCRIPT_VAR* buf = GetScriptParamPointer(thread); - buf->dwParam = 0; // https://github.com/cleolibrary/CLEO4/issues/91 - - if (convert_handle_to_file(hFile)) read_file(buf, size, 1, hFile); - return OR_CONTINUE; - } - - //0A9E=3,writefile %1d% size %2d% from %3d% - OpcodeResult __stdcall opcode_0A9E(CRunningScript *thread) - { - DWORD hFile; - DWORD size; - const void *buf; - *thread >> hFile >> size; - buf = GetScriptParamPointer(thread); - if (convert_handle_to_file(hFile)) - { - write_file(buf, size, 1, hFile); - flush_file(hFile); - } - return OR_CONTINUE; - } - //0A9F=1,%1d% = current_thread_pointer OpcodeResult __stdcall opcode_0A9F(CRunningScript *thread) { @@ -1951,7 +1617,7 @@ namespace CLEO else { SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } ScmFunction* scmFunc = new ScmFunction(thread); @@ -1964,7 +1630,7 @@ namespace CLEO if (pos == str.npos) { SHOW_ERROR("Invalid module reference '%s' in opcode [0AB1] in script %s \nScript suspended.", moduleTxt, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } std::string_view strExport = str.substr(0, pos); std::string_view strModule = str.substr(pos + 1); @@ -1978,7 +1644,7 @@ namespace CLEO if (!scriptRef.Valid()) { SHOW_ERROR("Not found module '%s' export '%s', requested by opcode [0AB1] in script %s", modulePath.c_str(), &str[0], ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } scmFunc->moduleExportRef = scriptRef.base; // to be released on return @@ -2000,7 +1666,7 @@ namespace CLEO else { SHOW_ERROR("Invalid type of first argument in opcode [0AB1], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } if (nParams) @@ -2009,13 +1675,13 @@ namespace CLEO if (nParams > nVarArg) // if less it means there are return params too { SHOW_ERROR("Opcode [0AB1] declared %d input args, but provided %d in script %s\nScript suspended.", nParams, nVarArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } if (nParams > 32) { SHOW_ERROR("Argument count %d is out of supported range (32) of opcode [0AB1] in script %s", nParams, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } @@ -2055,7 +1721,7 @@ namespace CLEO else { SHOW_ERROR("Invalid argument type '0x%02X' in opcode [0AB1] in script %s\nScript suspended.", paramType, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } } @@ -2094,14 +1760,14 @@ namespace CLEO if (!IsImmInteger(paramType)) { SHOW_ERROR("Invalid type of first argument in opcode [0AB2], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } DWORD declaredParamCount; *thread >> declaredParamCount; if (returnParamCount - 1 < declaredParamCount) // minus 'num args' itself { SHOW_ERROR("Opcode [0AB2] declared %d return args, but provided %d in script %s\nScript suspended.", declaredParamCount, returnParamCount - 1, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } else if (returnParamCount - 1 > declaredParamCount) // more args than needed, not critical { @@ -2382,7 +2048,7 @@ namespace CLEO if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("[0AC9] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } // allocated with 0AC8 @@ -2548,105 +2214,6 @@ namespace CLEO return OR_CONTINUE; } - //0AD5=3,file %1d% seek %2d% from_origin %3d% //IF and SET - OpcodeResult __stdcall opcode_0AD5(CRunningScript *thread) - { - DWORD hFile; - int seek, origin; - *thread >> hFile >> seek >> origin; - if (convert_handle_to_file(hFile)) SetScriptCondResult(thread, fseek(convert_handle_to_file(hFile), seek, origin) == 0); - else SetScriptCondResult(thread, false); - return OR_CONTINUE; - } - - //0AD6=1,end_of_file %1d% reached - OpcodeResult __stdcall opcode_0AD6(CRunningScript *thread) - { - DWORD hFile; - *thread >> hFile; - if (FILE *file = convert_handle_to_file(hFile)) - SetScriptCondResult(thread, ferror(file) || feof(file) != 0); - else - SetScriptCondResult(thread, true); - return OR_CONTINUE; - } - - //0AD7=3,read_string_from_file %1d% to %2d% size %3d% //IF and SET - OpcodeResult __stdcall opcode_0AD7(CRunningScript *thread) - { - DWORD hFile; - char *buf; - DWORD size; - *thread >> hFile; - if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread >> buf; - else buf = (char *)GetScriptParamPointer(thread); - *thread >> size; - if (convert_handle_to_file(hFile)) SetScriptCondResult(thread, fgets(buf, size, convert_handle_to_file(hFile)) == buf); - else SetScriptCondResult(thread, false); - return OR_CONTINUE; - } - - //0AD8=2,write_string_to_file %1d% from %2d% //IF and SET - OpcodeResult __stdcall opcode_0AD8(CRunningScript *thread) - { - DWORD hFile; *thread >> hFile; - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) - - if (FILE * file = convert_handle_to_file(hFile)) - { - SetScriptCondResult(thread, fputs(text, file) > 0); - fflush(file); - } - else - { - SetScriptCondResult(thread, false); - } - return OR_CONTINUE; - } - - //0AD9=-1,write_formated_text %2d% to_file %1d% - OpcodeResult __stdcall opcode_0AD9(CRunningScript *thread) - { - DWORD hFile; *thread >> hFile; - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - if (FILE * file = convert_handle_to_file(hFile)) - { - fputs(text, file); - fflush(file); - } - return OR_CONTINUE; - } - - //0ADA=-1,%3d% = scan_file %1d% format %2d% //IF and SET - OpcodeResult __stdcall opcode_0ADA(CRunningScript *thread) - { - DWORD hFile; *thread >> hFile; - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - int *result = (int *)GetScriptParamPointer(thread); - - size_t cExParams = 0; - SCRIPT_VAR *ExParams[35]; - // read extra params - while (*thread->GetBytePointer()) ExParams[cExParams++] = GetScriptParamPointer(thread); - thread->IncPtr(); - - if (FILE *file = convert_handle_to_file(hFile)) - { - *result = fscanf(file, format, - /* extra parameters (will be aligned automatically, but the limit of 35 elements maximum exists) */ - ExParams[0], ExParams[1], ExParams[2], ExParams[3], ExParams[4], ExParams[5], - ExParams[6], ExParams[7], ExParams[8], ExParams[9], ExParams[10], ExParams[11], - ExParams[12], ExParams[13], ExParams[14], ExParams[15], ExParams[16], ExParams[17], - ExParams[18], ExParams[19], ExParams[20], ExParams[21], ExParams[22], ExParams[23], - ExParams[24], ExParams[25], ExParams[26], ExParams[27], ExParams[28], ExParams[29], - ExParams[30], ExParams[31], ExParams[32], ExParams[33], ExParams[34]); - } - SetScriptCondResult(thread, cExParams == *result); - return OR_CONTINUE; - } - //0ADB=2,%2d% = car_model %1o% name OpcodeResult __stdcall opcode_0ADB(CRunningScript *thread) { @@ -2989,7 +2556,7 @@ namespace CLEO if (argCount < 1) { SHOW_ERROR("Opcode [2002] missing condition result argument in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } DWORD result; *thread >> result; @@ -3006,7 +2573,7 @@ namespace CLEO if (argCount != 0) // argument(s) not supported yet { SHOW_ERROR("Too many arguments of opcode [2003] in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } SetScriptCondResult(thread, false); @@ -3021,7 +2588,7 @@ namespace CLEO if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) { SHOW_ERROR("[2004] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return CCustomOpcodeSystem::ErrorSuspendScript(thread); + return thread->Suspend(); } // allocated with 0AC8 @@ -3098,6 +2665,23 @@ extern "C" return result; } + void WINAPI CLEO_ReadStringParamWriteBuffer(CLEO::CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator) + { + if (thread == nullptr || + outBuf == nullptr || + outBufSize == nullptr || + outNeedsTerminator == nullptr) + { + LOG_WARNING(thread, "Invalid argument of CLEO_ReadStringParamWriteBuffer in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return; + } + + auto target = GetStringParamWriteBuffer(thread); + *outBuf = target.data; + *outBufSize = target.size; + *outNeedsTerminator = target.needTerminator; + } + void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { if(!WriteStringParam(thread, str)) diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index f8e940c2..7885d4a9 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -8,8 +8,6 @@ namespace CLEO { typedef OpcodeResult(__stdcall * CustomOpcodeHandler)(CRunningScript*); - bool is_legacy_handle(DWORD dwHandle); - FILE * convert_handle_to_file(DWORD dwHandle); extern const char* (__cdecl* GetUserDirectory)(); extern void(__cdecl* ChangeToUserDir)(); @@ -45,18 +43,14 @@ namespace CLEO static OpcodeResult CallFunctionGeneric(WORD opcode, CRunningScript* thread, bool thisCall, bool returnArg); static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); - static OpcodeResult ErrorSuspendScript(CRunningScript* thread); // suspend script execution forever private: - friend OpcodeResult __stdcall opcode_0A9A(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0A9B(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AA2(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AA3(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC8(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AC9(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_2004(CRunningScript* pScript); - std::set m_hFiles; std::set m_hNativeLibs; std::set m_pAllocations; diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 1c48e62f..663f01a7 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -9,21 +9,6 @@ namespace CLEO { 0x0053E981, memory_und, 0x0053E981, 0x0053EE21, 0x00551174 }, // MA_CALL_UPDATE_GAME_LOGICS, { 0x0053BEE0, memory_und, 0x0053BEE0, 0x0053C380, 0x0054DE60 }, // MA_UPDATE_GAME_LOGICS_FUNCTION, - // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM - { 0x008232D8, memory_und, 0x00823318, 0x00824098, 0x0085C75E }, // MA_FOPEN_FUNCTION, - { 0x0082318B, memory_und, 0x008231CB, 0x00823F4B, 0x0085C396 }, // MA_FCLOSE_FUNCTION, - { 0x008231DC, memory_und, 0x0082321C, 0x00823F9C, 0x0085C680 }, // MA_FGETC_FUNCTION, - { 0x00823798, memory_und, 0x008237D8, 0x00824558, 0x0085D00C }, // MA_FGETS_FUNCTION, - { 0x008262B8, memory_und, 0x008262F8, 0x00826BA8, 0x008621F1 }, // MA_FPUTS_FUNCTION, - { 0x00823521, memory_und, 0x00823561, 0x008242E1, 0x0085CD04 }, // MA_FREAD_FUNCTION, - { 0x00823674, memory_und, 0x008236B4, 0x00824434, 0x0085CE7E }, // MA_FWRITE_FUNCTION, - { 0x0082374F, memory_und, 0x0082378F, 0x0082450F, 0x0085CF87 }, // MA_FSEEK_FUNCTION, - { 0x00823A30, memory_und, 0x00823A70, 0x008247F0, 0x0085D464 }, // MA_FPRINTF_FUNCTION, - { 0x00826261, memory_und, 0x008262A1, 0x00826B51, 0x00862183 }, // MA_FTELL_FUNCTION, - { 0x00823E86, memory_und, 0x00823EC6, 0x00824C46, 0x0085DDDD }, // MA_FFLUSH_FUNCTION, - { 0x008262A2, memory_und, 0x008262E2, 0x00826B92, 0x0085D193 }, // MA_FEOF_FUNCTION, - { 0x008262AD, memory_und, 0x008262ED, 0x00826B9D, 0x0085D1C2 }, // MA_FERROR_FUNCTION, - // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM { 0x00BA6748, memory_und, 0x00BA6748, 0x00BA8DC8, 0x00C33100 }, // MA_MENU_MANAGER, { 0x0071A700, memory_und, 0x0071A700, 0x0071AF30, 0x0073BF50 }, // MA_DRAW_TEXT_FUNCTION, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index ed73ed6e..aa0a82fa 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -27,21 +27,6 @@ namespace CLEO MA_CALL_UPDATE_GAME_LOGICS, MA_UPDATE_GAME_LOGICS_FUNCTION, - // CrtFix - MA_FOPEN_FUNCTION, - MA_FCLOSE_FUNCTION, - MA_FGETC_FUNCTION, - MA_FGETS_FUNCTION, - MA_FPUTS_FUNCTION, - MA_FREAD_FUNCTION, - MA_FWRITE_FUNCTION, - MA_FSEEK_FUNCTION, - MA_FPRINTF_FUNCTION, - MA_FTELL_FUNCTION, - MA_FFLUSH_FUNCTION, - MA_FEOF_FUNCTION, - MA_FERROR_FUNCTION, - // MenuStatusNotifier MA_MENU_MANAGER, MA_DRAW_TEXT_FUNCTION, diff --git a/source/cleo.def b/source/cleo.def index c9dffce9..3174d3ab 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -37,3 +37,4 @@ EXPORTS _CLEO_GetScriptDebugMode@4 @34 _CLEO_SetScriptDebugMode@8 @35 _CLEO_Log@8 @36 + _CLEO_ReadStringParamWriteBuffer@16 @37 diff --git a/tests/test_file_read_write.txt b/tests/test_file_read_write.txt new file mode 100644 index 00000000..65c77cf0 --- /dev/null +++ b/tests/test_file_read_write.txt @@ -0,0 +1,205 @@ +{$CLEO .cs} +{$USE file} +{$USE debug} + +debug_on +wait 3000 + +var 5@ : Integer +var 6@ : Integer + +copy_file "cleo\.cleo.log" {to} "cleo\.cleo_test.log" +while true + if + // test 0A9A + 0@ = open_file "cleo\.cleo_test.log" {mode} "r+" // read and write + then + print_formatted_now "0A9A File opened" time 1000 + wait 1000 + + // test 0A9C + 1@ = get_file_size 0@ + print_formatted_now "0A9C File size: %d" time 2000 1@ + wait 2000 + + // test 0A9D + 5@ = 0xCCCCCCCC + 0A9D: readfile 0@ size 2 to 5@ + print_formatted_now "0A9D Read WORD %x" time 2000 5@ + wait 2000 + + // test 0A9E + 5@ = 0xAABBCCDD + 0A9E: write_file 0@ size 2 from 5@ + 0AD5: file 0@ seek -2 from_origin SeekOrigin.Current //IF and SET + 5@ = 0 + 0A9D: readfile 0@ size 2 to 5@ + if + 5@ == 0xCCDD + then + print_formatted_now "0A9E ok" time 1000 + wait 1000 + else + print_formatted_now "~r~0A9E failed~n~read: 0x%X, expected: 0xCCDD" time 5000 5@ + wait 5000 + end + + // test 0AD5 + 0A9D: readfile 0@ size 4 to 5@ + if + 0AD5: file 0@ seek -2 from_origin SeekOrigin.Current //IF and SET + then + 0A9D: readfile 0@ size 4 to 6@ + if + 5@ <> 6@ + then + print_formatted_now "0AD5 ok" time 1000 + wait 1000 + else + print_formatted_now "~r~0AD5 invalid result" time 5000 + wait 5000 + end + else + print_formatted_now "~r~0AD5 seek back failed" time 5000 + wait 5000 + end + + // test 0AD6 + if + not is_end_of_file_reached 0@ + then + print_formatted_now "0AD6: not EOF yet" time 1000 + wait 1000 + else + print_formatted_now "~r~0AD6: EOF reached" time 5000 + wait 5000 + end + + // test 0AD7 + 0AD5: file 0@ seek 30 from_origin SeekOrigin.Current + if + 0AD7: read_string_from_file 0@ to 1@v size 15 + then + 0ACE: print_help_formatted "0AD7 read string" + print_formatted_now "Read: %s" time 2000 1@v + wait 2000 + else + print_formatted_now "~r~0AD7 failed" time 5000 + wait 5000 + end + + // test 0AD8 + if + 0AD8: write_string_to_file 0@ {text} "test text" + then + 0AD5: file 0@ seek -9 from_origin SeekOrigin.Current + 0AD7: read_string_from_file 0@ to 1@v size 10 + + if + 1@v == "test text" + then + print_formatted_now "0AD8 ok" time 1000 + wait 1000 + else + print_formatted_now "~r~0AD8 invalid result~n~%s" time 5000 1@v + wait 5000 + end + else + print_formatted_now "~r~0AD8 failed to write" time 5000 + wait 5000 + end + + // test 0AD9 + 0AD9: write_formatted_string_to_file 0@ {format} "%x%X%s" {args} 0xA 0xB "CD" + 0AD5: file 0@ seek -4 from_origin SeekOrigin.Current + 0AD7: read_string_from_file 0@ to 1@v size 5 + if + 1@v == "aBCD" + then + print_formatted_now "0AD9 ok" time 1000 + wait 1000 + else + print_formatted_now "~r~0AD9 invalid result~n~%s" time 5000 1@v + wait 5000 + end + + // test 0ADA + 0AD8: write_string_to_file 0@ {text} "5:17 3.1415 END" + 0AD5: file 0@ seek -15 from_origin SeekOrigin.Current + if + 0ADA: scan_file 0@ {format} "%d:%d %f" {nValues} 5@ {values} 6@ 7@ 8@ + then + if and + 5@ == 3 + 6@ == 5 + 7@ == 17 + 8@ == 3.1415 + then + 0AD7: read_string_from_file 0@ to 1@v size 5 + if + 1@v == " END" + then + print_formatted_now "0ADA ok" time 1000 + wait 1000 + else + print_formatted_now "~r~0ADA post check fail~n~%s" time 5000 1@v + wait 5000 + end + else + print_formatted_now "~r~0ADA invalid result~n~%d %d %d %f" time 5000 5@ 6@ 7@ 8@ + wait 5000 + end + else + print_formatted_now "~r~0ADA failed. Read args: %d" time 5000 5@ + wait 5000 + end + + // test 2300 + 2300: get_file_position 0@ {store_to} 5@ + 0AD8: write_string_to_file 0@ {text} "abc" + 2300: get_file_position 0@ {store_to} 6@ + 6@ -= 5@ + if + 6@ == 3 + then + print_formatted_now "2300 ok" time 1000 + wait 1000 + else + print_formatted_now "~r~2300 failed. Difference: %d" time 1000 6@ + wait 1000 + end + + // test 2301 + 0AD8: write_string_to_file 0@ {text} "test text" + 0AD5: file 0@ seek -9 from_origin SeekOrigin.Current + 1@ = 0 + 2@ = 0 + 3@ = 0 + 4@ = 0 + 5@ = get_var_pointer 1@ + if + 2301: read_block_from_file 0@ {size} 9 {buffer} 5@ + then + if + 1@v == "test text" + then + print_formatted_now "2301 ok" time 1000 + wait 1000 + else + print_formatted_now "~r~2301 invalid result~n~%s" time 5000 1@v + wait 5000 + end + else + print_formatted_now "~r~2301 failed to read" time 5000 + wait 5000 + end + + // test 0A9B + 0A9B: close_file 0@ + else + print_formatted_now "Failed to open the file" time 5000 + end + + print_formatted_now "Finished testing file read write opcodes" time 5000 + wait 5000 +end From b83fa35a4e08581bcc766c5a9372763dc75ffe26 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 19 Feb 2024 13:49:57 +0100 Subject: [PATCH 085/216] MemoryOperations plugin (#54) * Created MemoryOperations plugin. Opcode parameters validation utilities. Unit tests. * Merge fix. * Updates. * Fixes, more tests. * Plugins loading updated. * Fixed support of floats in get/set cleo variable opcodes. * Updates. * Fix * Updates. * 0A9C test * 0A9D test * Review updates. * Opcode get_script_struct_from_filename * Opcode 2404 updated * get_script_struct_just_created name updated * Test case updated --- CHANGELOG.md | 41 +- CLEO5.vcxproj | 8 +- CLEO5.vcxproj.filters | 6 + cleo_plugins/CLEO_Plugins.sln | 6 + cleo_plugins/DebugUtils/DebugUtils.cpp | 63 +- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 11 +- .../DebugUtils/DebugUtils.vcxproj.filters | 9 +- cleo_plugins/DebugUtils/ScreenLog.cpp | 2 +- cleo_plugins/DebugUtils/Utils.h | 16 - .../FileSystemOperations.cpp | 586 +++++++---- .../FileSystemOperations.vcxproj | 8 +- cleo_plugins/IniFiles/IniFiles.cpp | 2 +- cleo_plugins/IniFiles/IniFiles.vcxproj | 8 +- cleo_plugins/IntOperations/IntOperations.cpp | 160 +-- .../IntOperations/IntOperations.vcxproj | 8 +- .../MemoryOperations/MemoryOperations.cpp | 789 ++++++++++++++ .../MemoryOperations/MemoryOperations.vcxproj | 144 +++ .../MemoryOperations.vcxproj.filters | 43 + cleo_sdk/CLEO.h | 67 +- cleo_sdk/CLEO_Utils.h | 471 +++++++++ source/CCustomOpcodeSystem.cpp | 987 +++++------------- source/CCustomOpcodeSystem.h | 10 +- source/CDebug.h | 4 - source/CPluginSystem.h | 73 +- source/CScriptEngine.cpp | 124 ++- source/CScriptEngine.h | 15 +- source/cleo.def | 9 + source/stdafx.h | 9 +- tests/FilesystemOperations/0A99.s | Bin 0 -> 952 bytes tests/FilesystemOperations/0A99.txt | 109 ++ tests/FilesystemOperations/0A9A.s | Bin 0 -> 359 bytes tests/FilesystemOperations/0A9A.txt | 48 + tests/FilesystemOperations/0A9B.s | Bin 0 -> 458 bytes tests/FilesystemOperations/0A9B.txt | 57 + tests/FilesystemOperations/0A9C.s | Bin 0 -> 390 bytes tests/FilesystemOperations/0A9C.txt | 49 + tests/FilesystemOperations/0A9D.s | Bin 0 -> 1246 bytes tests/FilesystemOperations/0A9D.txt | 114 ++ tests/MemoryOperations/0A8C.s | Bin 0 -> 1910 bytes tests/MemoryOperations/0A8C.txt | 161 +++ tests/MemoryOperations/0A8D.s | Bin 0 -> 1386 bytes tests/MemoryOperations/0A8D.txt | 129 +++ tests/MemoryOperations/0A96.s | Bin 0 -> 205 bytes tests/MemoryOperations/0A96.txt | 34 + tests/MemoryOperations/0A97.s | Bin 0 -> 240 bytes tests/MemoryOperations/0A97.txt | 41 + tests/MemoryOperations/0A98.s | Bin 0 -> 242 bytes tests/MemoryOperations/0A98.txt | 41 + tests/MemoryOperations/0AC6.s | Bin 0 -> 291 bytes tests/MemoryOperations/0AC6.txt | 45 + tests/MemoryOperations/0AC7.s | Bin 0 -> 257 bytes tests/MemoryOperations/0AC7.txt | 39 + tests/MemoryOperations/0AC8.s | Bin 0 -> 421 bytes tests/MemoryOperations/0AC8.txt | 50 + tests/MemoryOperations/0AC9.s | Bin 0 -> 225 bytes tests/MemoryOperations/0AC9.txt | 32 + tests/MemoryOperations/0AE9.txt | 38 + tests/MemoryOperations/0AEA.s | Bin 0 -> 222 bytes tests/MemoryOperations/0AEA.txt | 37 + tests/MemoryOperations/0AEB.s | Bin 0 -> 255 bytes tests/MemoryOperations/0AEB.txt | 41 + tests/MemoryOperations/0AEC.txt | 41 + tests/MemoryOperations/2400.s | Bin 0 -> 2443 bytes tests/MemoryOperations/2400.txt | 189 ++++ tests/MemoryOperations/2401.s | Bin 0 -> 1965 bytes tests/MemoryOperations/2401.txt | 161 +++ tests/MemoryOperations/2402.s | Bin 0 -> 1968 bytes tests/MemoryOperations/2402.txt | 156 +++ tests/MemoryOperations/2403.s | Bin 0 -> 108 bytes tests/MemoryOperations/2403.txt | 32 + tests/MemoryOperations/2404.s | Bin 0 -> 255 bytes tests/MemoryOperations/2404.txt | 35 + tests/cleo_tests_runner.cs | Bin 0 -> 581 bytes tests/cleo_tests_runner.txt | 96 ++ 74 files changed, 4239 insertions(+), 1215 deletions(-) delete mode 100644 cleo_plugins/DebugUtils/Utils.h create mode 100644 cleo_plugins/MemoryOperations/MemoryOperations.cpp create mode 100644 cleo_plugins/MemoryOperations/MemoryOperations.vcxproj create mode 100644 cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters create mode 100644 cleo_sdk/CLEO_Utils.h create mode 100644 tests/FilesystemOperations/0A99.s create mode 100644 tests/FilesystemOperations/0A99.txt create mode 100644 tests/FilesystemOperations/0A9A.s create mode 100644 tests/FilesystemOperations/0A9A.txt create mode 100644 tests/FilesystemOperations/0A9B.s create mode 100644 tests/FilesystemOperations/0A9B.txt create mode 100644 tests/FilesystemOperations/0A9C.s create mode 100644 tests/FilesystemOperations/0A9C.txt create mode 100644 tests/FilesystemOperations/0A9D.s create mode 100644 tests/FilesystemOperations/0A9D.txt create mode 100644 tests/MemoryOperations/0A8C.s create mode 100644 tests/MemoryOperations/0A8C.txt create mode 100644 tests/MemoryOperations/0A8D.s create mode 100644 tests/MemoryOperations/0A8D.txt create mode 100644 tests/MemoryOperations/0A96.s create mode 100644 tests/MemoryOperations/0A96.txt create mode 100644 tests/MemoryOperations/0A97.s create mode 100644 tests/MemoryOperations/0A97.txt create mode 100644 tests/MemoryOperations/0A98.s create mode 100644 tests/MemoryOperations/0A98.txt create mode 100644 tests/MemoryOperations/0AC6.s create mode 100644 tests/MemoryOperations/0AC6.txt create mode 100644 tests/MemoryOperations/0AC7.s create mode 100644 tests/MemoryOperations/0AC7.txt create mode 100644 tests/MemoryOperations/0AC8.s create mode 100644 tests/MemoryOperations/0AC8.txt create mode 100644 tests/MemoryOperations/0AC9.s create mode 100644 tests/MemoryOperations/0AC9.txt create mode 100644 tests/MemoryOperations/0AE9.txt create mode 100644 tests/MemoryOperations/0AEA.s create mode 100644 tests/MemoryOperations/0AEA.txt create mode 100644 tests/MemoryOperations/0AEB.s create mode 100644 tests/MemoryOperations/0AEB.txt create mode 100644 tests/MemoryOperations/0AEC.txt create mode 100644 tests/MemoryOperations/2400.s create mode 100644 tests/MemoryOperations/2400.txt create mode 100644 tests/MemoryOperations/2401.s create mode 100644 tests/MemoryOperations/2401.txt create mode 100644 tests/MemoryOperations/2402.s create mode 100644 tests/MemoryOperations/2402.txt create mode 100644 tests/MemoryOperations/2403.s create mode 100644 tests/MemoryOperations/2403.txt create mode 100644 tests/MemoryOperations/2404.s create mode 100644 tests/MemoryOperations/2404.txt create mode 100644 tests/cleo_tests_runner.cs create mode 100644 tests/cleo_tests_runner.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 282a641e..73acc8ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,20 +8,32 @@ - new opcode **2101 ([trace](https://library.sannybuilder.com/#/sa/debug/2101))** - new opcode **2102 ([log_to_file](https://library.sannybuilder.com/#/sa/debug/2102))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) +- new [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - file related opcodes moved from CLEO into separated plugin + - opcode **0A9E ([write_to_file](https://library.sannybuilder.com/#/sa/file/0A9E))** now supports literal numbers and strings + - fixed bug preventing file stream opcodes from working correctly for read-write modes + - fixed buffer overflows in file stream read opcodes + - added/fixed support of all file stream opcodes in legacy mode (Cleo3) + - new opcode **2300 ([get_file_position](https://library.sannybuilder.com/#/sa/file/2300))** + - new opcode **2301 ([read_block_from_file](https://library.sannybuilder.com/#/sa/file/2301))** + - **2302 ([resolve_filepath](https://library.sannybuilder.com/#/sa/file/2302))** + - **2303 ([get_script_filename](https://library.sannybuilder.com/#/sa/file/2303))** +- new [MemoryOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/MemoryOperations) plugin + - memory related opcodes moved from CLEO into separated plugin + - validation of input and output parameters for all opcodes + - opcode **0A8C ([write_memory](https://library.sannybuilder.com/#/sa/memory/0A8C))** now supports strings + - new opcode **2400 ([copy_memory](https://library.sannybuilder.com/#/sa/memory/2400))** + - new opcode **2401 ([read_memory_with_offset](https://library.sannybuilder.com/#/sa/memory/2401))** + - new opcode **2402 ([writememory_with_offset](https://library.sannybuilder.com/#/sa/memory/2402))** + - new opcode **2403 ([forget_memory](https://library.sannybuilder.com/#/sa/memory/2403))** + - new opcode **2404 ([get_script_struct_just_created](https://library.sannybuilder.com/#/sa/memory/2404))** + - new opcode **2405 ([is_script_running](https://library.sannybuilder.com/#/sa/memory/2405))** + - new opcode **2406 ([get_script_struct_from_filename](https://library.sannybuilder.com/#/sa/memory/2406))** - new and updated opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** - - **2000 ([resolve_filepath](https://library.sannybuilder.com/#/sa/CLEO/2000))** - - **2001 ([get_script_filename](https://library.sannybuilder.com/#/sa/CLEO/2001))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - - **2004 ([forget_memory](https://library.sannybuilder.com/#/sa/CLEO/2004))** - - **2300 ([get_file_position](https://library.sannybuilder.com/#/sa/file/2300))** - - **2301 ([read_block_from_file](https://library.sannybuilder.com/#/sa/file/2301))** - - opcodes **0A9A**, **0A9B**, **0A9C**, **0A9D**, **0A9E**, **0AAB**, **0AD5**, **0AD6**, **0AD7**, **0AD8**, **0AD9**, **0ADA**, **0AE4**, **0AE5**, **0AE6**, **0AE7** and **0AE8** moved to the [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin - - fixed bug preventing file stream opcodes from working correctly for read-write modes - - fixed buffer overflows in file stream read opcodes - - added/fixed support of all file stream opcodes in legacy mode (Cleo3) - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - **cleo_return_\*** opcodes now can pass strings as return arguments @@ -46,22 +58,33 @@ - fixed resolution dependent aspect ratio of CLEO text in main menu - fixed clearing mission locals when new CLEO mission is started - when reading less than 4 bytes with **0A9D (readfile)** now remaining bytes of the target variable are set to zero +- fixed invalid 7 characters length limit of **0AAA (get_script_struct_named)** #### SDK AND PLUGINS - now all opcodes in range **0-7FFF** can be registered by plugins - plugins moved to _cleo\cleo_plugins_ directory - new SDK method: CLEO_RegisterCallback - new SDK method: CLEO_GetVarArgCount +- new SDK method: CLEO_PeekIntOpcodeParam +- new SDK method: CLEO_PeekFloatOpcodeParam - new SDK method: CLEO_SkipUnusedVarArgs - new SDK method: CLEO_ReadParamsFormatted - new SDK method: CLEO_ReadStringParamWriteBuffer +- new SDK method: CLEO_GetOpcodeParamsArray +- new SDK method: CLEO_GetParamsHandledCount - new SDK method: CLEO_GetScriptVersion - new SDK method: CLEO_GetScriptInfoStr +- new SDK method: CLEO_GetScriptFilename +- new SDK method: CLEO_GetScriptWorkDir +- new SDK method: CLEO_SetScriptWorkDir - new SDK method: CLEO_ResolvePath +- new SDK method: CLEO_GetScriptByName +- new SDK method: CLEO_GetScriptByFilename - new SDK method: CLEO_GetScriptDebugMode - new SDK method: CLEO_SetScriptDebugMode - new SDK method: CLEO_Log + #### CLEO internal - project migrated to VS 2022 - configured game debugging settings diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 850780ea..6c41675f 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -58,9 +58,13 @@ Create Create + + NotUsing + + @@ -150,7 +154,7 @@ true MultiThreaded $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) Create stdcpp17 @@ -182,7 +186,7 @@ xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" true MultiThreadedDebug $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;%(PreprocessorDefinitions); + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;%(PreprocessorDefinitions);;TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) Create stdcpp17 diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 79d21bd5..8b8127ad 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -102,6 +102,9 @@ source\extensions + + plugin_sdk + @@ -167,6 +170,9 @@ source\extensions + + cleo_sdk + diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln index 1794ece0..2960e5f4 100644 --- a/cleo_plugins/CLEO_Plugins.sln +++ b/cleo_plugins/CLEO_Plugins.sln @@ -11,6 +11,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IntOperations", "IntOperati EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DebugUtils", "DebugUtils\DebugUtils.vcxproj", "{481896C4-0C19-4992-9602-729537774B32}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryOperations", "MemoryOperations\MemoryOperations.vcxproj", "{35C80F79-8B18-4925-8C32-94B320DBE76F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x86 = Debug|x86 @@ -33,6 +35,10 @@ Global {481896C4-0C19-4992-9602-729537774B32}.Debug|x86.Build.0 = Debug|Win32 {481896C4-0C19-4992-9602-729537774B32}.Release|x86.ActiveCfg = Release|Win32 {481896C4-0C19-4992-9602-729537774B32}.Release|x86.Build.0 = Release|Win32 + {35C80F79-8B18-4925-8C32-94B320DBE76F}.Debug|x86.ActiveCfg = Debug|Win32 + {35C80F79-8B18-4925-8C32-94B320DBE76F}.Debug|x86.Build.0 = Debug|Win32 + {35C80F79-8B18-4925-8C32-94B320DBE76F}.Release|x86.ActiveCfg = Release|Win32 + {35C80F79-8B18-4925-8C32-94B320DBE76F}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index db83becf..d0c1d36b 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -1,6 +1,6 @@ #include "ScreenLog.h" -#include "Utils.h" #include "CLEO.h" +#include "CLEO_Utils.h" #include "CTimer.h" #include // keyboard #include @@ -33,37 +33,35 @@ class DebugUtils DebugUtils() { auto cleoVer = CLEO_GetVersion(); - if (cleoVer >= CLEO_VERSION) + if (cleoVer < CLEO_VERSION) { - auto config = GetConfigFilename(); + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; + } - // register opcodes - CLEO_RegisterOpcode(0x00C3, Opcode_DebugOn); - CLEO_RegisterOpcode(0x00C4, Opcode_DebugOff); - CLEO_RegisterOpcode(0x2100, Opcode_Breakpoint); - CLEO_RegisterOpcode(0x2101, Opcode_Trace); - CLEO_RegisterOpcode(0x2102, Opcode_LogToFile); + auto config = GetConfigFilename(); - // original Rockstar's script debugging opcodes - if(GetPrivateProfileInt("General", "LegacyDebugOpcodes", 0, config.c_str()) != 0) - { - CLEO_RegisterOpcode(0x0662, Opcode_PrintString); - CLEO_RegisterOpcode(0x0663, Opcode_PrintInt); - CLEO_RegisterOpcode(0x0664, Opcode_PrintFloat); - } + // register opcodes + CLEO_RegisterOpcode(0x00C3, Opcode_DebugOn); + CLEO_RegisterOpcode(0x00C4, Opcode_DebugOff); + CLEO_RegisterOpcode(0x2100, Opcode_Breakpoint); + CLEO_RegisterOpcode(0x2101, Opcode_Trace); + CLEO_RegisterOpcode(0x2102, Opcode_LogToFile); - // register event callbacks - CLEO_RegisterCallback(eCallbackId::Log, OnLog); - CLEO_RegisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); - CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); - CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); - } - else + // original Rockstar's script debugging opcodes + if(GetPrivateProfileInt("General", "LegacyDebugOpcodes", 0, config.c_str()) != 0) { - std::string err(128, '\0'); - sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); - MessageBox(HWND_DESKTOP, err.data(), "DebugUtils.cleo", MB_SYSTEMMODAL | MB_ICONERROR); + CLEO_RegisterOpcode(0x0662, Opcode_PrintString); + CLEO_RegisterOpcode(0x0663, Opcode_PrintInt); + CLEO_RegisterOpcode(0x0664, Opcode_PrintFloat); } + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::Log, OnLog); + CLEO_RegisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); + CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); } // ---------------------------------------------- event callbacks ------------------------------------------------- @@ -129,11 +127,14 @@ class DebugUtils { keysReleased = false; - std::stringstream ss; - ss << "Script breakpoint "; - if (!pausedScripts[i].msg.empty()) ss << "'" << pausedScripts[i].msg << "' "; - ss << "released in '" << pausedScripts[i].ptr->GetName() << "'"; - CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + if (!CTimer::m_CodePause) + { + std::stringstream ss; + ss << "Script breakpoint "; + if (!pausedScripts[i].msg.empty()) ss << "'" << pausedScripts[i].msg << "' "; // TODO: restore color if custom was used in name + ss << "released in '" << pausedScripts[i].ptr->GetName() << "'"; + CLEO_Log(eLogLevel::Debug, ss.str().c_str()); + } if (CTimer::m_CodePause) { diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 119798e5..838cf9e1 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -45,13 +45,13 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ DebugUtils - .cleo + .cleo5 $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ DebugUtils - .cleo + .cleo5 $(GTA_SA_DIR)\gta_sa.exe @@ -68,7 +68,7 @@ true MultiThreaded $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 @@ -97,7 +97,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" true MultiThreadedDebug $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 @@ -127,8 +127,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + - diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters index 46219848..533d3aa0 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters @@ -15,13 +15,18 @@ sdk - + sdk - + + sdk + + + sdk + diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index b13c09bf..6c9151e8 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -1,5 +1,5 @@ #include "ScreenLog.h" -#include "Utils.h" +#include "CLEO_Utils.h" #include "CFont.h" #include "CTimer.h" diff --git a/cleo_plugins/DebugUtils/Utils.h b/cleo_plugins/DebugUtils/Utils.h deleted file mode 100644 index 58c9ade0..00000000 --- a/cleo_plugins/DebugUtils/Utils.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "CLEO.h" -#include "CFileMgr.h" -#include - -// plugin's config file -static std::string GetConfigFilename() -{ - std::string configFile = CFileMgr::ms_rootDirName; - if (!configFile.empty() && configFile.back() != '\\') configFile.push_back('\\'); - - configFile += "cleo\\cleo_plugins\\DebugUtils.ini"; - - return configFile; -} - diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 62c43901..721eba3b 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -1,17 +1,17 @@ #include "plugin.h" #include "CLEO.h" +#include "CLEO_Utils.h" #include "FileUtils.h" -#include "Utils.h" #include using namespace CLEO; using namespace plugin; -#define READ_HANDLE_PARAM() CLEO_GetIntOpcodeParam(thread); \ +#define READ_FILE_HANDLE_PARAM() CLEO_GetIntOpcodeParam(thread); \ if((size_t)handle <= MinValidAddress) \ - { auto info = scriptInfoStr(thread); SHOW_ERROR("Invalid '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } \ - else if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = scriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } + { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } \ + else if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } class FileSystemOperations { @@ -33,63 +33,89 @@ class FileSystemOperations FileSystemOperations() { auto cleoVer = CLEO_GetVersion(); - if (cleoVer >= CLEO_VERSION) - { - File::initialize(CLEO_GetGameVersion()); // file utils - - //register opcodes - CLEO_RegisterOpcode(0x0A9A, opcode_0A9A); - CLEO_RegisterOpcode(0x0A9B, opcode_0A9B); - CLEO_RegisterOpcode(0x0A9C, opcode_0A9C); - CLEO_RegisterOpcode(0x0A9D, opcode_0A9D); - CLEO_RegisterOpcode(0x0A9E, opcode_0A9E); - CLEO_RegisterOpcode(0x0AD5, opcode_0AD5); - CLEO_RegisterOpcode(0x0AD6, opcode_0AD6); - CLEO_RegisterOpcode(0x0AD7, opcode_0AD7); - CLEO_RegisterOpcode(0x0AD8, opcode_0AD8); - CLEO_RegisterOpcode(0x0AD9, opcode_0AD9); - CLEO_RegisterOpcode(0x0ADA, opcode_0ADA); - CLEO_RegisterOpcode(0x2300, opcode_2300); - CLEO_RegisterOpcode(0x2301, opcode_2301); - - CLEO_RegisterOpcode(0x0AAB, Script_FS_FileExists); - CLEO_RegisterOpcode(0x0AE4, Script_FS_DirectoryExists); - CLEO_RegisterOpcode(0x0AE5, Script_FS_CreateDirectory); - CLEO_RegisterOpcode(0x0AE6, Script_FS_FindFirstFile); - CLEO_RegisterOpcode(0x0AE7, Script_FS_FindNextFile); - CLEO_RegisterOpcode(0x0AE8, Script_FS_FindClose); - - CLEO_RegisterOpcode(0x0B00, Script_FS_DeleteFile); - CLEO_RegisterOpcode(0x0B01, Script_FS_DeleteDirectory); - CLEO_RegisterOpcode(0x0B02, Script_FS_MoveFile); - CLEO_RegisterOpcode(0x0B03, Script_FS_MoveDir); - CLEO_RegisterOpcode(0x0B04, Script_FS_CopyFile); - CLEO_RegisterOpcode(0x0B05, Script_FS_CopyDir); - - // register event callbacks - CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); - } - else + if (cleoVer < CLEO_VERSION) { - std::string err(128, '\0'); - sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); - MessageBox(HWND_DESKTOP, err.data(), "FileSystemOperations.cleo", MB_SYSTEMMODAL | MB_ICONERROR); + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; } + + File::initialize(CLEO_GetGameVersion()); // file utils + + //register opcodes + CLEO_RegisterOpcode(0x0A99, opcode_0A99); // set_current_directory + CLEO_RegisterOpcode(0x0A9A, opcode_0A9A); // openfile + CLEO_RegisterOpcode(0x0A9B, opcode_0A9B); // closefile + CLEO_RegisterOpcode(0x0A9C, opcode_0A9C); // get_file_size + CLEO_RegisterOpcode(0x0A9D, opcode_0A9D); // read_from_file + CLEO_RegisterOpcode(0x0A9E, opcode_0A9E); // write_to_file + + CLEO_RegisterOpcode(0x0AAB, Script_FS_FileExists); + CLEO_RegisterOpcode(0x0AE4, Script_FS_DirectoryExists); + CLEO_RegisterOpcode(0x0AE5, Script_FS_CreateDirectory); + CLEO_RegisterOpcode(0x0AE6, Script_FS_FindFirstFile); + CLEO_RegisterOpcode(0x0AE7, Script_FS_FindNextFile); + CLEO_RegisterOpcode(0x0AE8, Script_FS_FindClose); + + CLEO_RegisterOpcode(0x0AD5, opcode_0AD5); // file_seek + CLEO_RegisterOpcode(0x0AD6, opcode_0AD6); // is_end_of_file_reached + CLEO_RegisterOpcode(0x0AD7, opcode_0AD7); // read_string_from_file + CLEO_RegisterOpcode(0x0AD8, opcode_0AD8); // write_string_to_file + CLEO_RegisterOpcode(0x0AD9, opcode_0AD9); // write_formatted_string_to_file + CLEO_RegisterOpcode(0x0ADA, opcode_0ADA); // scan_file + + CLEO_RegisterOpcode(0x0B00, Script_FS_DeleteFile); + CLEO_RegisterOpcode(0x0B01, Script_FS_DeleteDirectory); + CLEO_RegisterOpcode(0x0B02, Script_FS_MoveFile); + CLEO_RegisterOpcode(0x0B03, Script_FS_MoveDir); + CLEO_RegisterOpcode(0x0B04, Script_FS_CopyFile); + CLEO_RegisterOpcode(0x0B05, Script_FS_CopyDir); + + CLEO_RegisterOpcode(0x2300, opcode_2300); // get_file_position + CLEO_RegisterOpcode(0x2301, opcode_2301); // read_block_from_file + CLEO_RegisterOpcode(0x2302, opcode_2302); // resolve_filepath + CLEO_RegisterOpcode(0x2303, opcode_2303); // get_script_filename + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); } - static std::string ReadPathParam(CRunningScript* thread) + //0A99=1,set_current_directory %1b:userdir/rootdir% + static OpcodeResult __stdcall opcode_0A99(CRunningScript* thread) { - std::string path(MAX_STR_LEN, '\0'); - CLEO_ReadStringOpcodeParam(thread, path.data(), MAX_STR_LEN); - CLEO_ResolvePath(thread, path.data(), MAX_STR_LEN); - path.resize(strlen(path.c_str())); - return path; + const char* path; + + auto paramType = CLEO_GetOperandType(thread); + if (IsImmInteger(paramType) || IsVariable(paramType)) + { + // numbered predefined paths + auto idx = OPCODE_READ_PARAM_INT(); + switch (idx) + { + case 0: path = DIR_GAME; break; + case 1: path = DIR_USER; break; + case 2: path = DIR_SCRIPT; break; + default: + LOG_WARNING(0, "Value (%d) not known by opcode [0A99] in script %s", idx, ScriptInfoStr(thread).c_str()); + return OR_CONTINUE; + } + + CLEO_SetScriptWorkDir(thread, path); + return OR_CONTINUE; + } + else + { + path = OPCODE_READ_PARAM_STRING(); + } + + CLEO_SetScriptWorkDir(thread, path); + return OR_CONTINUE; } //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET static OpcodeResult WINAPI opcode_0A9A(CRunningScript* thread) { - auto filename = ReadPathParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); char mode[16]; auto paramType = CLEO_GetOperandType(thread); @@ -101,36 +127,37 @@ class FileSystemOperations DWORD uParam; char strParam[4]; } param; - param.uParam = CLEO_GetIntOpcodeParam(thread); + param.uParam = OPCODE_READ_PARAM_INT(); strcpy(mode, param.strParam); } else { - CLEO_ReadStringOpcodeParam(thread, mode, sizeof(mode)); + OPCODE_READ_PARAM_STRING_BUFF(mode, sizeof(mode)); } // either CLEO 3 or CLEO 4 made a big mistake! (they differ in one major unapparent preference) // lets try to resolve this with a legacy mode bool legacy = CLEO_GetScriptVersion(thread) < CLEO_VER_4_3; - auto handle = File::open(filename.c_str(), mode, legacy); + auto handle = File::open(filename, mode, legacy); if (!File::isOk(handle)) { - CLEO_SetIntOpcodeParam(thread, NULL); - CLEO_SetThreadCondResult(thread, false); + OPCODE_WRITE_PARAM_INT(0); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } m_hFiles.insert(handle); - CLEO_SetIntOpcodeParam(thread, handle); - CLEO_SetThreadCondResult(thread, true); + + OPCODE_WRITE_PARAM_INT(handle); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } //0A9B=1,closefile %1d% static OpcodeResult WINAPI opcode_0A9B(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); + DWORD handle = READ_FILE_HANDLE_PARAM(); if (m_hFiles.find(handle) != m_hFiles.end()) { @@ -140,97 +167,149 @@ class FileSystemOperations return OR_CONTINUE; } - //0A9C=2,%2d% = file %1d% size + //0A9C=2,get_file_size %1d% store_to %2d% static OpcodeResult WINAPI opcode_0A9C(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); + DWORD handle = READ_FILE_HANDLE_PARAM(); auto size = File::getSize(handle); - CLEO_SetIntOpcodeParam(thread, size); + + OPCODE_WRITE_PARAM_INT(size); return OR_CONTINUE; } - //0A9D=3,readfile %1d% size %2d% to %3d% + //0A9D=3,read_from_file %1d% size %2d% store_to %3d% static OpcodeResult WINAPI opcode_0A9D(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - DWORD size = CLEO_GetIntOpcodeParam(thread); - SCRIPT_VAR* buffer = CLEO_GetPointerToScriptVariable(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto size = OPCODE_READ_PARAM_INT(); + auto destination = OPCODE_READ_PARAM_OUTPUT_VAR(); + + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } - buffer->dwParam = 0; // https://github.com/cleolibrary/CLEO4/issues/91 - File::read(handle, buffer, size); + destination->dwParam = 0; // clear not overwritten bytes - https://github.com/cleolibrary/CLEO4/issues/91 + if (size > 0) File::read(handle, destination, size); return OR_CONTINUE; } - //0A9E=3,writefile %1d% size %2d% from %3d% + //0A9E=3,write_to_file %1d% size %2d% from %3d% static OpcodeResult WINAPI opcode_0A9E(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - DWORD size = CLEO_GetIntOpcodeParam(thread); - SCRIPT_VAR* buffer = CLEO_GetPointerToScriptVariable(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto size = OPCODE_READ_PARAM_INT(); + + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + if (size == 0) + { + return OR_CONTINUE; // done + } + + const void* source; + auto paramType = thread->PeekDataType(); + if (IsVariable(paramType)) + { + source = CLEO_GetPointerToScriptVariable(thread); + } + else if(IsImmString(paramType) || IsVarString(paramType)) + { + static char buffer[MAX_STR_LEN]; + + if (size > MAX_STR_LEN) + { + SHOW_ERROR("Size argument (%d) greater than supported (%d) in script %s\nScript suspended.", size, MAX_STR_LEN, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + ZeroMemory(buffer, size); // padd with zeros if size > length + source = CLEO_ReadStringOpcodeParam(thread, buffer, sizeof(buffer)); + } + else + { + if (size > sizeof(SCRIPT_VAR)) + { + SHOW_ERROR("Size argument (%d) greater than supported (%d) in script %s\nScript suspended.", size, sizeof(SCRIPT_VAR), ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } - File::write(handle, buffer, size); + CLEO_RetrieveOpcodeParams(thread, 1); + source = CLEO_GetOpcodeParamsArray(); + } + + File::write(handle, source, size); if (File::isOk(handle)) File::flush(handle); return OR_CONTINUE; } - // 0AAB=1, file_exists %1s% + // 0AAB=1, does_file_exist %1s% static OpcodeResult WINAPI Script_FS_FileExists(CRunningScript* thread) { - auto filename = ReadPathParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); - DWORD fAttr = GetFileAttributes(filename.c_str()); + DWORD fAttr = GetFileAttributes(filename); bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY); - CLEO_SetThreadCondResult(thread, exists); + OPCODE_CONDITION_RESULT(exists); return OR_CONTINUE; } - //0AD5=3,file %1d% seek %2d% from_origin %3d% //IF and SET + //0AD5=3, file_seek %1d% offset %2d% origin %3d% //IF and SET static OpcodeResult WINAPI opcode_0AD5(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - int offset = (int)CLEO_GetIntOpcodeParam(thread); - DWORD origin = CLEO_GetIntOpcodeParam(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto offset = OPCODE_READ_PARAM_INT(); + auto origin = OPCODE_READ_PARAM_UINT(); + + auto ok = File::seek(handle, offset, origin); - bool ok = File::seek(handle, offset, origin); - CLEO_SetThreadCondResult(thread, ok); + OPCODE_CONDITION_RESULT(ok); return OR_CONTINUE; } - //0AD6=1,end_of_file %1d% reached + //0AD6=1, is_end_of_file_reached %1d% static OpcodeResult WINAPI opcode_0AD6(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); + auto handle = READ_FILE_HANDLE_PARAM(); bool end = !File::isOk(handle) || File::isEndOfFile(handle); - CLEO_SetThreadCondResult(thread, end); + + OPCODE_CONDITION_RESULT(end); return OR_CONTINUE; } - //0AD7=3,read_string_from_file %1d% to %2d% size %3d% //IF and SET + //0AD7=3, read_string_from_file %1d% to %2d% size %3d% //IF and SET static OpcodeResult WINAPI opcode_0AD7(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); + auto handle = READ_FILE_HANDLE_PARAM(); char* buffer = nullptr; int bufferSize = 0; DWORD needsTerminator = TRUE; CLEO_ReadStringParamWriteBuffer(thread, &buffer, &bufferSize, &needsTerminator); - int size = CLEO_GetIntOpcodeParam(thread); + auto size = OPCODE_READ_PARAM_INT(); + + if (size < 0) + { + auto info = ScriptInfoStr(thread); + SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, info.c_str()); + return thread->Suspend(); + } + if (size == 0) { if (bufferSize > 0) buffer[0] = '\0'; - CLEO_SetThreadCondResult(thread, false); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } - if (size < 0) - { - auto info = scriptInfoStr(thread); - SHOW_ERROR("Invalid size argument (%d) in opcode [0AD7] in script %s\nScript suspended.", size, info.c_str()); - return thread->Suspend(); - } std::vector tmpBuff; tmpBuff.resize(size); @@ -239,7 +318,7 @@ class FileSystemOperations bool ok = File::readString(handle, data, size) != nullptr; if(!ok) { - CLEO_SetThreadCondResult(thread, false); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } @@ -250,33 +329,33 @@ class FileSystemOperations memcpy(buffer, data, resultSize); if(resultSize < bufferSize) buffer[resultSize] = '\0'; // terminate string whenever possible - CLEO_SetThreadCondResult(thread, true); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } - //0AD8=2,write_string_to_file %1d% from %2d% //IF and SET + //0AD8=2, write_string_to_file %1d% from %2d% //IF and SET static OpcodeResult WINAPI opcode_0AD8(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - auto text = CLEO_ReadStringOpcodeParam(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto text = OPCODE_READ_PARAM_STRING(); auto ok = File::writeString(handle, text); if (!ok) { - CLEO_SetThreadCondResult(thread, false); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } File::flush(handle); - CLEO_SetThreadCondResult(thread, true); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } //0AD9=-1,write_formated_text %2d% to_file %1d% static OpcodeResult WINAPI opcode_0AD9(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - auto format = CLEO_ReadStringOpcodeParam(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto format = OPCODE_READ_PARAM_STRING(); static char text[4 * MAX_STR_LEN]; CLEO_ReadParamsFormatted(thread, format, text, MAX_STR_LEN); auto ok = File::writeString(handle, text); @@ -289,12 +368,12 @@ class FileSystemOperations return OR_CONTINUE; } - //0ADA=-1,%3d% = scan_file %1d% format %2d% //IF and SET + //0ADA=-1, %3d% = scan_file %1d% format %2d% //IF and SET static OpcodeResult WINAPI opcode_0ADA(CRunningScript* thread) { - DWORD handle = READ_HANDLE_PARAM(); - auto format = CLEO_ReadStringOpcodeParam(thread); - auto result = (DWORD*)CLEO_GetPointerToScriptVariable(thread); + auto handle = READ_FILE_HANDLE_PARAM(); + auto format = OPCODE_READ_PARAM_STRING(); + auto result = OPCODE_READ_PARAM_OUTPUT_VAR(); size_t paramCount = 0; SCRIPT_VAR* outputParams[35]; @@ -305,146 +384,94 @@ class FileSystemOperations } CLEO_SkipUnusedVarArgs(thread); // var arg terminator - *result = File::scan(handle, format, (void**)&outputParams); - - //CLEO_SetThreadCondResult(thread, paramCount == *result); - CLEO_SetThreadCondResult(thread, true); - return OR_CONTINUE; - } - - //2300=2,get_file_position %1d% store_to %2d% - static OpcodeResult WINAPI opcode_2300(CRunningScript* thread) - { - DWORD handle = READ_HANDLE_PARAM(); - - auto pos = File::getPos(handle); - CLEO_SetIntOpcodeParam(thread, pos); - return OR_CONTINUE; - } - - //2301=3,read_block_from_file %1d% size %2d% buffer %3d% // IF and SET - static OpcodeResult WINAPI opcode_2301(CRunningScript* thread) - { - DWORD handle = READ_HANDLE_PARAM(); - DWORD size = CLEO_GetIntOpcodeParam(thread); - - auto paramType = CLEO_GetOperandType(thread); - if(!IsImmInteger(paramType) && !IsVariable(paramType)) - { - auto info = scriptInfoStr(thread); - SHOW_ERROR("Invalid type (0x%02X) of 'address' argument in opcode [2301] in script %s\nScript suspended.", paramType, info.c_str()); - return thread->Suspend(); - } - DWORD target = CLEO_GetIntOpcodeParam(thread); OPCODE_VALIDATE_POINTER(target) - - if(size < 0) - { - auto info = scriptInfoStr(thread); - SHOW_ERROR("Invalid size argument (%d) in opcode [2301] in script %s\nScript suspended.", size, info.c_str()); - return thread->Suspend(); - } - - if (size == 0) - { - CLEO_SetThreadCondResult(thread, true); // done - return OR_CONTINUE; - } - - auto readCount = File::read(handle, (void*)target, size); - if (readCount != size) - { - CLEO_SetThreadCondResult(thread, false); - return OR_CONTINUE; - } + result->dwParam = File::scan(handle, format, (void**)&outputParams); - CLEO_SetThreadCondResult(thread, true); + //OPCODE_CONDITION_RESULT(paramCount == result->dwParam); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } - // 0AE4=1, directory_exist %1s% + // 0AE4=1, directory_exist %1s% static OpcodeResult WINAPI Script_FS_DirectoryExists(CRunningScript* thread) { - auto filename = ReadPathParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); - DWORD fAttr = GetFileAttributes(filename.c_str()); + DWORD fAttr = GetFileAttributes(filename); bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && (fAttr & FILE_ATTRIBUTE_DIRECTORY); - CLEO_SetThreadCondResult(thread, exists); + OPCODE_CONDITION_RESULT(exists); return OR_CONTINUE; } - // 0AE5=1, create_directory %1s% //IF and SET + // 0AE5=1, create_directory %1s% //IF and SET static OpcodeResult WINAPI Script_FS_CreateDirectory(CRunningScript* thread) { - auto filename = ReadPathParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); - bool result = CreateDirectory(filename.c_str(), NULL) != 0; + bool result = CreateDirectory(filename, NULL) != 0; - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } - // 0AE6=3, %2d% = find_first_file %1s% get_filename_to %3s% //IF and SET + // 0AE6=3, %2d% = find_first_file %1s% get_filename_to %3s% //IF and SET static OpcodeResult WINAPI Script_FS_FindFirstFile(CRunningScript* thread) { - auto filename = ReadPathParam(thread); - WIN32_FIND_DATA ffd = { 0 }; - HANDLE handle = FindFirstFile(filename.c_str(), &ffd); + auto filename = OPCODE_READ_PARAM_FILEPATH(); - CLEO_SetIntOpcodeParam(thread, (DWORD)handle); + WIN32_FIND_DATA ffd = { 0 }; + HANDLE handle = FindFirstFile(filename, &ffd); - if (handle != INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) // -1 { - m_hFileSearches.insert(handle); - - CLEO_WriteStringOpcodeParam(thread, ffd.cFileName); - CLEO_SetThreadCondResult(thread, true); - } - else - { - CLEO_SkipOpcodeParams(thread, 1); - CLEO_SetThreadCondResult(thread, false); + OPCODE_WRITE_PARAM_INT(-1); // invalid handle + OPCODE_SKIP_PARAMS(1); // filename + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; } + + m_hFileSearches.insert(handle); + + OPCODE_WRITE_PARAM_INT(handle); + OPCODE_WRITE_PARAM_STRING(ffd.cFileName); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } // 0AE7=2,%2s% = find_next_file %1d% //IF and SET static OpcodeResult WINAPI Script_FS_FindNextFile(CRunningScript* thread) { - auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + auto handle = (HANDLE)OPCODE_READ_PARAM_INT(); if (m_hFileSearches.find(handle) == m_hFileSearches.end()) { - auto info = scriptInfoStr(thread); - LOG_WARNING(thread, "[0AE7] used with handle (0x%X) to unknown or already closed file search in script %s", handle, info.c_str()); - CLEO_SkipOpcodeParams(thread, 1); - CLEO_SetThreadCondResult(thread, false); + LOG_WARNING(thread, "Invalid or already closed file search handle (0x%X) in script %s", handle, ScriptInfoStr(thread).c_str()); + OPCODE_SKIP_PARAMS(1); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } WIN32_FIND_DATA ffd = { 0 }; - if (FindNextFile(handle, &ffd)) + if (!FindNextFile(handle, &ffd)) { - CLEO_WriteStringOpcodeParam(thread, ffd.cFileName); - CLEO_SetThreadCondResult(thread, true); - } - else - { - CLEO_SkipOpcodeParams(thread, 1); - CLEO_SetThreadCondResult(thread, false); + OPCODE_SKIP_PARAMS(1); + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; } + + OPCODE_WRITE_PARAM_STRING(ffd.cFileName); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } - // 0AE8=1, find_close %1d% + // 0AE8=1,find_close %1d% static OpcodeResult WINAPI Script_FS_FindClose(CRunningScript* thread) { - auto handle = (HANDLE)CLEO_GetIntOpcodeParam(thread); + auto handle = (HANDLE)OPCODE_READ_PARAM_INT(); if (m_hFileSearches.find(handle) == m_hFileSearches.end()) { - auto info = scriptInfoStr(thread); - LOG_WARNING(thread, "[0AE8] used with handle (0x%X) to unknown or already closed file search in script %s", handle, info.c_str()); + LOG_WARNING(thread, "Invalid or already closed file search handle (0x%X) in script %s", handle, ScriptInfoStr(thread).c_str()); return OR_CONTINUE; } @@ -453,13 +480,14 @@ class FileSystemOperations return OR_CONTINUE; } - // 0B00=1, delete_file %1s% //IF and SET + // 0B00=1, delete_file %1s% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteFile(CScriptThread* thread) { - auto filename = ReadPathParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); - auto success = DeleteFile(filename.c_str()); - CLEO_SetThreadCondResult(thread, success); + auto success = DeleteFile(filename); + + OPCODE_CONDITION_RESULT(success); return OR_CONTINUE; } @@ -511,68 +539,74 @@ class FileSystemOperations // 0B01=1, delete_directory %1s% with_all_files_and_subdirectories %2d% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteDirectory(CScriptThread* thread) { - auto dirpath = ReadPathParam(thread); - int DeleteAllInsideFlag = CLEO_GetIntOpcodeParam(thread); + auto filename = OPCODE_READ_PARAM_FILEPATH(); + auto deleteContents = OPCODE_READ_PARAM_BOOL(); BOOL result; - if (DeleteAllInsideFlag) + if (deleteContents) { // remove directory with all files and subdirectories - result = DeleteDir(dirpath.c_str()); + result = DeleteDir(filename); } else { // try to remove as empty directory - result = RemoveDirectory(dirpath.c_str()); + result = RemoveDirectory(filename); } - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } // 0B02=2, move_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveFile(CScriptThread* thread) { - auto filepath = ReadPathParam(thread); - auto newFilepath = ReadPathParam(thread); + auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); + auto filepath = std::string(tmpStr); // store before reusing buffer + + auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; if (!result) - result = MoveFile(filepath.c_str(), newFilepath.c_str()); + result = MoveFile(filepath.c_str(), newFilepath); - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } // 0B03=2, move_directory %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveDir(CScriptThread* thread) { - auto filepath = ReadPathParam(thread); - auto newFilepath = ReadPathParam(thread); + auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); + auto filepath = std::string(tmpStr); // store before reusing buffer + + auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; if (result) - result = MoveFile(filepath.c_str(), newFilepath.c_str()); + result = MoveFile(filepath.c_str(), newFilepath); - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } // 0B04=2, copy_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_CopyFile(CScriptThread* thread) { - auto filepath = ReadPathParam(thread); - auto newFilepath = ReadPathParam(thread); + auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); + auto filepath = std::string(tmpStr); // store before reusing buffer - BOOL result = CopyFile(filepath.c_str(), newFilepath.c_str(), FALSE); + auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); + + BOOL result = CopyFile(filepath.c_str(), newFilepath, FALSE); if (result) { // copy file attributes DWORD fattr = GetFileAttributes(filepath.c_str()); - SetFileAttributes(newFilepath.c_str(), fattr); + SetFileAttributes(newFilepath, fattr); } - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } @@ -624,19 +658,111 @@ class FileSystemOperations FindClose(hSearch); } - //return success return TRUE; } - // 0B05=2, copy_directory %1d% to %2d% //IF and SET + // 0B05=2, copy_directory %1d% to %2d% //IF and SET static OpcodeResult WINAPI Script_FS_CopyDir(CScriptThread* thread) { - auto filepath = ReadPathParam(thread); - auto newFilepath = ReadPathParam(thread); + auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); + auto filepath = std::string(tmpStr); // store before reusing buffer + + auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); + + BOOL result = CopyDir(filepath.c_str(), newFilepath); - BOOL result = CopyDir(filepath.c_str(), newFilepath.c_str()); + OPCODE_CONDITION_RESULT(result); + return OR_CONTINUE; + } + + //2300=2,get_file_position %1d% store_to %2d% + static OpcodeResult WINAPI opcode_2300(CRunningScript* thread) + { + auto handle = READ_FILE_HANDLE_PARAM(); + + auto pos = File::getPos(handle); + + OPCODE_WRITE_PARAM_INT(pos); + return OR_CONTINUE; + } + + //2301=3,read_block_from_file %1d% size %2d% buffer %3d% // IF and SET + static OpcodeResult WINAPI opcode_2301(CRunningScript* thread) + { + auto handle = READ_FILE_HANDLE_PARAM(); + auto size = OPCODE_READ_PARAM_INT(); + auto destination = OPCODE_READ_PARAM_PTR(); + + if (size < 0) + { + auto info = ScriptInfoStr(thread); + SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, info.c_str()); + return thread->Suspend(); + } + + if (size == 0) + { + OPCODE_CONDITION_RESULT(true); // done + return OR_CONTINUE; + } + + auto readCount = File::read(handle, destination, size); + if (readCount != size) + { + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + + OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; + } + + //2302=2,%2s% = resolve_filepath %1s% + static OpcodeResult __stdcall opcode_2302(CRunningScript* thread) + { + auto path = OPCODE_READ_PARAM_FILEPATH(); // it also resolves the path to absolute form + + OPCODE_WRITE_PARAM_STRING(path); + return OR_CONTINUE; + } + + //2303=3,%3s% = get_script_filename %1d% full_path %2d% // IF and SET + static OpcodeResult __stdcall opcode_2303(CRunningScript* thread) + { + auto script = OPCODE_READ_PARAM_INT(); + auto fullPath = OPCODE_READ_PARAM_BOOL(); + + if (script == -1) // special case: current script + { + script = (int)thread; + } + else + { + OPCODE_VALIDATE_POINTER(script); + } + + const char* filename = CLEO_GetScriptFilename((CRunningScript*)script); + if (filename == nullptr) + { + OPCODE_SKIP_PARAMS(1); + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + + if (!fullPath) + { + OPCODE_WRITE_PARAM_STRING(filename); + } + else + { + std::string absolute = ".\\"; + absolute += filename; + absolute.resize(MAX_STR_LEN); + CLEO_ResolvePath((CRunningScript*)script, absolute.data(), MAX_STR_LEN); + OPCODE_WRITE_PARAM_STRING(absolute.c_str()); + } - CLEO_SetThreadCondResult(thread, result); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } } fileSystemOperations; diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index a338af16..5129493b 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -45,13 +45,13 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ FileSystemOperations - .cleo + .cleo5 $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ FileSystemOperations - .cleo + .cleo5 $(GTA_SA_DIR)\gta_sa.exe @@ -67,7 +67,7 @@ true true MultiThreaded - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) stdcpp17 @@ -92,7 +92,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" Disabled true MultiThreadedDebug - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) stdcpp17 diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index 8e9e1b9d..2daff3dd 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -24,7 +24,7 @@ class IniFiles { std::string err(128, '\0'); sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); - MessageBox(HWND_DESKTOP, err.data(), "IniFiles.cleo", MB_SYSTEMMODAL | MB_ICONERROR); + MessageBox(HWND_DESKTOP, err.data(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); } } diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index 7a80d178..32eb6071 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -45,13 +45,13 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ IniFiles - .cleo + .cleo5 $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ IniFiles - .cleo + .cleo5 $(GTA_SA_DIR)\gta_sa.exe @@ -68,7 +68,7 @@ true MultiThreaded $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 @@ -95,7 +95,7 @@ if defined GTA_SA_DIR ( true MultiThreadedDebug $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index 4b1a8f0f..febd6a80 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -1,5 +1,6 @@ #include "plugin.h" #include "CLEO.h" +#include "CLEO_Utils.h" using namespace CLEO; using namespace plugin; @@ -10,31 +11,29 @@ class IntOperations IntOperations() { auto cleoVer = CLEO_GetVersion(); - if (cleoVer >= CLEO_VERSION) + if (cleoVer < CLEO_VERSION) { - //register opcodes - CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); - CLEO_RegisterOpcode(0x0B11, Script_IntOp_OR); - CLEO_RegisterOpcode(0x0B12, Script_IntOp_XOR); - CLEO_RegisterOpcode(0x0B13, Script_IntOp_NOT); - CLEO_RegisterOpcode(0x0B14, Script_IntOp_MOD); - CLEO_RegisterOpcode(0x0B15, Script_IntOp_SHR); - CLEO_RegisterOpcode(0x0B16, Script_IntOp_SHL); - CLEO_RegisterOpcode(0x0B17, Scr_IntOp_AND); - CLEO_RegisterOpcode(0x0B18, Scr_IntOp_OR); - CLEO_RegisterOpcode(0x0B19, Scr_IntOp_XOR); - CLEO_RegisterOpcode(0x0B1A, Scr_IntOp_NOT); - CLEO_RegisterOpcode(0x0B1B, Scr_IntOp_MOD); - CLEO_RegisterOpcode(0x0B1C, Scr_IntOp_SHR); - CLEO_RegisterOpcode(0x0B1D, Scr_IntOp_SHL); - CLEO_RegisterOpcode(0x0B1E, Sign_Extend); - } - else - { - std::string err(128, '\0'); - sprintf(err.data(), "This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); - MessageBox(HWND_DESKTOP, err.data(), "IntOperations.cleo", MB_SYSTEMMODAL | MB_ICONERROR); + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; } + + //register opcodes + CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); + CLEO_RegisterOpcode(0x0B11, Script_IntOp_OR); + CLEO_RegisterOpcode(0x0B12, Script_IntOp_XOR); + CLEO_RegisterOpcode(0x0B13, Script_IntOp_NOT); + CLEO_RegisterOpcode(0x0B14, Script_IntOp_MOD); + CLEO_RegisterOpcode(0x0B15, Script_IntOp_SHR); + CLEO_RegisterOpcode(0x0B16, Script_IntOp_SHL); + CLEO_RegisterOpcode(0x0B17, Scr_IntOp_AND); + CLEO_RegisterOpcode(0x0B18, Scr_IntOp_OR); + CLEO_RegisterOpcode(0x0B19, Scr_IntOp_XOR); + CLEO_RegisterOpcode(0x0B1A, Scr_IntOp_NOT); + CLEO_RegisterOpcode(0x0B1B, Scr_IntOp_MOD); + CLEO_RegisterOpcode(0x0B1C, Scr_IntOp_SHR); + CLEO_RegisterOpcode(0x0B1D, Scr_IntOp_SHL); + CLEO_RegisterOpcode(0x0B1E, Sign_Extend); } static OpcodeResult WINAPI Script_IntOp_AND(CScriptThread* thread) @@ -43,11 +42,12 @@ class IntOperations 0B10=3,%3d% = %1d% AND %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a & b); + auto result = a & b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -57,11 +57,12 @@ class IntOperations 0B11=3,%3d% = %1d% OR %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a | b); + auto result = a | b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -71,11 +72,12 @@ class IntOperations 0B12=3,%3d% = %1d% XOR %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a ^ b); + auto result = a ^ b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -85,10 +87,9 @@ class IntOperations 0B13=2,%2d% = NOT %1d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - - CLEO_SetIntOpcodeParam(thread, ~a); + auto a = OPCODE_READ_PARAM_INT(); + OPCODE_WRITE_PARAM_INT(~a); return OR_CONTINUE; } @@ -98,11 +99,12 @@ class IntOperations 0B14=3,%3d% = %1d% MOD %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a % b); + auto result = a % b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -112,11 +114,12 @@ class IntOperations 0B15=3,%3d% = %1d% SHR %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a >> b); + auto result = a >> b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -126,11 +129,12 @@ class IntOperations 0B16=3,%3d% = %1d% SHL %2d% ****************************************************************/ { - int a = CLEO_GetIntOpcodeParam(thread); - int b = CLEO_GetIntOpcodeParam(thread); + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); - CLEO_SetIntOpcodeParam(thread, a << b); + auto result = a << b; + OPCODE_WRITE_PARAM_INT(result); return OR_CONTINUE; } @@ -144,9 +148,10 @@ class IntOperations 0B17=2,%1d% &= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam &= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam &= value; return OR_CONTINUE; } @@ -156,9 +161,10 @@ class IntOperations 0B18=2,%1d% |= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam |= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam |= value; return OR_CONTINUE; } @@ -168,9 +174,10 @@ class IntOperations 0B19=2,%1d% ^= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam ^= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam ^= value; return OR_CONTINUE; } @@ -180,8 +187,9 @@ class IntOperations 0B1A=1,~%1d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - op->dwParam = ~op->dwParam; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + + operand->dwParam = ~operand->dwParam; return OR_CONTINUE; } @@ -191,9 +199,10 @@ class IntOperations 0B1B=2,%1d% %= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam %= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam %= value; return OR_CONTINUE; } @@ -203,9 +212,10 @@ class IntOperations 0B1C=2,%1d% >>= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam >>= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam >>= value; return OR_CONTINUE; } @@ -215,9 +225,10 @@ class IntOperations 0B1D=2,%1d% <<= %2d% ****************************************************************/ { - SCRIPT_VAR * op = CLEO_GetPointerToScriptVariable(thread); - int val = CLEO_GetIntOpcodeParam(thread); - op->dwParam <<= val; + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto value = OPCODE_READ_PARAM_INT(); + + operand->dwParam <<= value; return OR_CONTINUE; } @@ -227,18 +238,21 @@ class IntOperations 0B1E=2,sign_extend %1d% size %2d% ****************************************************************/ { - SCRIPT_VAR* op = CLEO_GetPointerToScriptVariable(thread); - int size = CLEO_GetIntOpcodeParam(thread); + auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto size = OPCODE_READ_PARAM_INT(); - if (size > 0 && size < 4) + if (size <= 0 || size > 4) { - size_t offset = size * 8 - 1; // bit offset of top most bit in source value - bool signBit = op->dwParam & (1 << offset); + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + size_t offset = size * 8 - 1; // bit offset of top most bit in source value + bool signBit = operand->dwParam & (1 << offset); - if(signBit) - { - op->dwParam |= 0xFFFFFFFF << offset; // set all upper bits - } + if(signBit) + { + operand->dwParam |= 0xFFFFFFFF << offset; // set all upper bits } return OR_CONTINUE; diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj b/cleo_plugins/IntOperations/IntOperations.vcxproj index adfe34fa..3d0b82a1 100644 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/IntOperations/IntOperations.vcxproj @@ -45,13 +45,13 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ IntOperations - .cleo + .cleo5 $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ IntOperations - .cleo + .cleo5 $(GTA_SA_DIR)\gta_sa.exe @@ -68,7 +68,7 @@ true MultiThreaded $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 @@ -95,7 +95,7 @@ if defined GTA_SA_DIR ( true MultiThreadedDebug $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk - _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions) + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp new file mode 100644 index 00000000..58a90f7d --- /dev/null +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -0,0 +1,789 @@ +#include "CLEO.h" +#include "CLEO_Utils.h" +#include "plugin.h" +#include "CTheScripts.h" +#include + +using namespace CLEO; +using namespace plugin; + +class MemoryOperations +{ +public: + static std::set m_allocations; + static std::set m_libraries; + + MemoryOperations() + { + auto cleoVer = CLEO_GetVersion(); + if (cleoVer < CLEO_VERSION) + { + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; + } + + //register opcodes + CLEO_RegisterOpcode(0x0A8C, opcode_0A8C); // write_memory + CLEO_RegisterOpcode(0x0A8D, opcode_0A8D); // read_memory + + CLEO_RegisterOpcode(0x0A96, opcode_0A96); // get_ped_pointer + CLEO_RegisterOpcode(0x0A97, opcode_0A97); // get_vehicle_pointer + CLEO_RegisterOpcode(0x0A98, opcode_0A98); // get_object_pointer + + CLEO_RegisterOpcode(0x0A9F, opcode_0A9F); // get_object_pointer + + CLEO_RegisterOpcode(0x0AA2, opcode_0AA2); // load_dynamic_library + CLEO_RegisterOpcode(0x0AA3, opcode_0AA3); // free_library + CLEO_RegisterOpcode(0x0AA4, opcode_0AA4); // get_dynamic_library_procedure + CLEO_RegisterOpcode(0x0AA5, opcode_0AA5); // call_function + CLEO_RegisterOpcode(0x0AA6, opcode_0AA6); // call_method + CLEO_RegisterOpcode(0x0AA7, opcode_0AA7); // call_function_return + CLEO_RegisterOpcode(0x0AA8, opcode_0AA8); // call_method_return + + CLEO_RegisterOpcode(0x0AAA, opcode_0AAA); // get_script_struct_named + + CLEO_RegisterOpcode(0x0AC6, opcode_0AC6); // get_label_pointer + CLEO_RegisterOpcode(0x0AC7, opcode_0AC7); // get_var_pointer + CLEO_RegisterOpcode(0x0AC8, opcode_0AC8); // allocate_memory + CLEO_RegisterOpcode(0x0AC9, opcode_0AC9); // free_memory + + CLEO_RegisterOpcode(0x0AE9, opcode_0AE9); // pop_float + CLEO_RegisterOpcode(0x0AEA, opcode_0AEA); // get_ped_ref + CLEO_RegisterOpcode(0x0AEB, opcode_0AEB); // get_vehicle_ref + CLEO_RegisterOpcode(0x0AEC, opcode_0AEC); // get_object_ref + + CLEO_RegisterOpcode(0x2400, opcode_2400); // copy_memory + CLEO_RegisterOpcode(0x2401, opcode_2401); // read_memory_with_offset + CLEO_RegisterOpcode(0x2402, opcode_2402); // write_memory_with_offset + CLEO_RegisterOpcode(0x2403, opcode_2403); // forget_memory + CLEO_RegisterOpcode(0x2404, opcode_2404); // get_script_struct_just_created + CLEO_RegisterOpcode(0x2405, opcode_2405); // is_script_running + CLEO_RegisterOpcode(0x2406, opcode_2406); // get_script_struct_from_filename + + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); + } + + static void __stdcall OnFinalizeScriptObjects() + { + TRACE("Cleaning up %d allocated memory blocks...", m_allocations.size()); + for (auto p : m_allocations) free(p); + m_allocations.clear(); + + TRACE("Cleaning up %d loaded libraries...", m_libraries.size()); + std::for_each(m_libraries.begin(), m_libraries.end(), FreeLibrary); + m_libraries.clear(); + } + + // opcodes 0AA5 - 0AA8 + static OpcodeResult CallFunctionGeneric(CLEO::CRunningScript* thread, void* func, void* obj, int numArg, int numPop, bool returnArg) + { + int nVarArg = CLEO_GetVarArgCount(thread); + if (numArg + returnArg != nVarArg) // and return argument + { + SHOW_ERROR("Declared %d input args, but provided %d in script %s\nScript suspended.", numArg, (int)nVarArg - returnArg, CLEO::ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + constexpr size_t Max_Args = 32; + if (nVarArg > Max_Args) + { + SHOW_ERROR("Provided more (%d) than supported (%d) arguments in script %s\nScript suspended.", nVarArg, Max_Args, CLEO::ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + static SCRIPT_VAR arguments[Max_Args] = { 0 }; + SCRIPT_VAR* arguments_end = arguments + numArg; + + constexpr size_t Max_Text_Params = 5; + static char textParams[Max_Text_Params][MAX_STR_LEN]; + size_t currTextParam = 0; + + numPop *= 4; // bytes peer argument + + // retrieve parameters + auto scriptParams = CLEO_GetOpcodeParamsArray(); + for (size_t i = 0; i < (size_t)numArg; i++) + { + auto& param = arguments[i]; + + auto paramType = thread->PeekDataType(); + if (IsImmString(paramType) || IsVarString(paramType)) + { + if (currTextParam >= Max_Text_Params) + { + SHOW_ERROR("Provided more (%d) than supported (%d) string arguments in script %s\nScript suspended.", currTextParam + 1, Max_Text_Params, CLEO::ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + param.pcParam = OPCODE_READ_PARAM_STRING_BUFF(textParams[currTextParam], MAX_STR_LEN); + currTextParam++; + } + else if (IsImmInteger(paramType) || IsImmFloat(paramType) || IsVariable(paramType)) + { + CLEO_RetrieveOpcodeParams(thread, 1); + param = scriptParams[0]; + } + else + { + SHOW_ERROR("Invalid param type (%s) in script %s \nScript suspended.", ToKindStr(paramType), CLEO::ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + } + + DWORD result; + _asm + { + // transfer args to stack + lea ecx, arguments + call_func_loop : + cmp ecx, arguments_end + jae call_func_loop_end + push[ecx] + add ecx, 0x4 + jmp call_func_loop + call_func_loop_end : + + // call function + mov ecx, obj + xor eax, eax + call func + mov result, eax // get result + add esp, numPop // cleanup stack + } + + if (returnArg) + { + auto paramType = thread->PeekDataType(); + + if (IsVarString(paramType)) + { + OPCODE_WRITE_PARAM_STRING((char*)result); + } + else + { + OPCODE_WRITE_PARAM_UINT(result); + } + } + + CLEO_SkipUnusedVarArgs(thread); + return OR_CONTINUE; + } + + //0A8C=4,write_memory %1d% size %2d% value %3d% virtual_protect %4d% + static OpcodeResult __stdcall opcode_0A8C(CLEO::CRunningScript* thread) + { + // collect params + auto address = OPCODE_READ_PARAM_PTR(); + auto size = OPCODE_READ_PARAM_INT(); + + // value param + const void* source; + auto paramType = thread->PeekDataType(); + bool sourceText = false; + if (IsVariable(paramType)) + { + source = CLEO_GetPointerToScriptVariable(thread); + } + else if (IsImmString(paramType) || IsVarString(paramType)) + { + static char buffer[MAX_STR_LEN]; + + if (size > MAX_STR_LEN) + { + SHOW_ERROR("Size argument (%d) greater than supported (%d) in script %s\nScript suspended.", size, MAX_STR_LEN, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + ZeroMemory(buffer, size); // padd with zeros if size > length + source = CLEO_ReadStringOpcodeParam(thread, buffer, sizeof(buffer)); + sourceText = true; + } + else + { + static SCRIPT_VAR value; + + CLEO_RetrieveOpcodeParams(thread, 1); + value = CLEO_GetOpcodeParamsArray()[0]; + source = &value; + } + + auto virtualProtect = OPCODE_READ_PARAM_BOOL(); + + // validate params + if ((size_t)address <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer param in script %s\nScript suspended.", address, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + // perform + if (size == 0) return OR_CONTINUE; // done + + if (virtualProtect) + { + DWORD oldProtect; + VirtualProtect(address, size, PAGE_EXECUTE_READWRITE, &oldProtect); + } + + if (!sourceText) + { + // that's how it worked since ever... + if (size == 2 || size == 4) + memcpy(address, source, size); + else + memset(address, *((int*)source), size); + } + else + { + memcpy(address, source, size); + } + + return OR_CONTINUE; + } + + //0A8D=4,read_memory %1d% size %2d% virtual_protect %3d% store_to %4d% + static OpcodeResult __stdcall opcode_0A8D(CLEO::CRunningScript* thread) + { + // collect params + auto address = OPCODE_READ_PARAM_PTR(); + auto size = OPCODE_READ_PARAM_INT(); + auto virtualProtect = OPCODE_READ_PARAM_BOOL(); + + // validate params + if ((size_t)address <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer param of in script %s\nScript suspended.", address, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + if (size < 0 || size > sizeof(SCRIPT_VAR)) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + // perform + DWORD value = 0; + if (size > 0) + { + if (virtualProtect) + { + DWORD oldProtect; + VirtualProtect(address, size, PAGE_EXECUTE_READWRITE, &oldProtect); + } + + memcpy(&value, address, size); + } + + OPCODE_WRITE_PARAM_UINT(value); + return OR_CONTINUE; + } + + //0A96=2,get_ped_pointer %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0A96(CLEO::CRunningScript* thread) + { + // collect params + auto handle = OPCODE_READ_PARAM_UINT(); + + // validate params + auto index = handle >> 8; // lowest byte holds flags + if (index >= (DWORD)CPools::ms_pPedPool->m_nSize) + { + OPCODE_WRITE_PARAM_PTR(nullptr); + return OR_CONTINUE; + } + + auto ptr = CPools::GetPed(handle); + + OPCODE_WRITE_PARAM_PTR(ptr); + return OR_CONTINUE; + } + + //0A97=2,get_vehicle_pointer %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0A97(CLEO::CRunningScript* thread) + { + // collect params + auto handle = OPCODE_READ_PARAM_UINT(); + + // validate params + auto index = handle >> 8; // lowest byte holds flags + if (index >= (DWORD)CPools::ms_pVehiclePool->m_nSize) + { + OPCODE_WRITE_PARAM_PTR(nullptr); + return OR_CONTINUE; + } + + auto ptr = CPools::GetVehicle(handle); + + OPCODE_WRITE_PARAM_PTR(ptr); + return OR_CONTINUE; + } + + //0A98=2,get_object_pointer %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0A98(CLEO::CRunningScript* thread) + { + // collect params + auto handle = OPCODE_READ_PARAM_UINT(); + + // validate params + auto index = handle >> 8; // lowest byte holds flags + if (index >= (DWORD)CPools::ms_pObjectPool->m_nSize) + { + OPCODE_WRITE_PARAM_PTR(nullptr); + return OR_CONTINUE; + } + + auto ptr = CPools::GetObject(handle); + + OPCODE_WRITE_PARAM_PTR(ptr); + return OR_CONTINUE; + } + + //0A9F=1, get_this_script_struct store_to %1d% + static OpcodeResult __stdcall opcode_0A9F(CLEO::CRunningScript* thread) + { + OPCODE_WRITE_PARAM_PTR(thread); + return OR_CONTINUE; + } + + //0AA2=2, load_dynamic_library %1s% store_to %2d% // IF and SET + static OpcodeResult __stdcall opcode_0AA2(CLEO::CRunningScript* thread) + { + auto str = OPCODE_READ_PARAM_FILEPATH(); + + auto ptr = LoadLibrary(str); + if (ptr != nullptr) + { + m_libraries.insert(ptr); + } + + OPCODE_WRITE_PARAM_PTR(ptr); + OPCODE_CONDITION_RESULT(ptr != nullptr); + return OR_CONTINUE; + } + + //0AA3=1,free_library %1h% + static OpcodeResult __stdcall opcode_0AA3(CLEO::CRunningScript* thread) + { + auto ptr = (HMODULE)OPCODE_READ_PARAM_PTR(); + + // validate + if (m_libraries.find(ptr) == m_libraries.end()) + { + LOG_WARNING(thread, "Invalid '0x%X' pointer param to unknown or already freed library in script %s", ptr, ScriptInfoStr(thread).c_str()); + return OR_CONTINUE; + } + + FreeLibrary(ptr); + m_libraries.erase(ptr); + return OR_CONTINUE; + } + + //0AA4=3, get_proc_address %1d% library %2d% result %3d% // IF and SET + static OpcodeResult __stdcall opcode_0AA4(CLEO::CRunningScript* thread) + { + auto name = OPCODE_READ_PARAM_STRING(); + auto ptr = (HMODULE)OPCODE_READ_PARAM_PTR(); + + // validate + if (m_libraries.find(ptr) == m_libraries.end()) + { + LOG_WARNING(thread, "Invalid '0x%X' pointer param to unknown or freed library in script %s", ptr, ScriptInfoStr(thread).c_str()); + OPCODE_WRITE_PARAM_PTR(nullptr); + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + + auto funcPtr = (void*)GetProcAddress(ptr, name); + + OPCODE_WRITE_PARAM_PTR(funcPtr); + OPCODE_CONDITION_RESULT(funcPtr != nullptr); + return OR_CONTINUE; + } + + //0AA5=-1,call %1d% num_params %2h% pop %3h% + static OpcodeResult __stdcall opcode_0AA5(CLEO::CRunningScript* thread) + { + auto func = OPCODE_READ_PARAM_PTR(); + auto numArgs = OPCODE_READ_PARAM_INT(); + auto numPop = OPCODE_READ_PARAM_INT(); + + return CallFunctionGeneric(thread, func, nullptr, numArgs, numPop, false); + } + + //0AA6=-1,call_method %1d% struct %2d% num_params %3h% pop %4h% + static OpcodeResult __stdcall opcode_0AA6(CLEO::CRunningScript* thread) + { + auto func = OPCODE_READ_PARAM_PTR(); + auto obj = OPCODE_READ_PARAM_PTR(); + auto numArgs = OPCODE_READ_PARAM_INT(); + auto numPop = OPCODE_READ_PARAM_INT(); + + return CallFunctionGeneric(thread, func, obj, numArgs, numPop, false); + } + + //0AA7=-1,call_function_return %1d% num_params %2h% pop %3h% + static OpcodeResult __stdcall opcode_0AA7(CLEO::CRunningScript* thread) + { + auto func = OPCODE_READ_PARAM_PTR(); + auto numArgs = OPCODE_READ_PARAM_INT(); + auto numPop = OPCODE_READ_PARAM_INT(); + + return CallFunctionGeneric(thread, func, nullptr, numArgs, numPop, true); + } + + //0AA8=-1,call_method_return %1d% struct %2d% num_params %3h% pop %4h% + static OpcodeResult __stdcall opcode_0AA8(CLEO::CRunningScript* thread) + { + auto func = OPCODE_READ_PARAM_PTR(); + auto obj = OPCODE_READ_PARAM_PTR(); + auto numArgs = OPCODE_READ_PARAM_INT(); + auto numPop = OPCODE_READ_PARAM_INT(); + + return CallFunctionGeneric(thread, func, obj, numArgs, numPop, true); + } + + //0AAA=2, get_script_struct_named %1d% pointer %2d% // IF and SET + static OpcodeResult __stdcall opcode_0AAA(CLEO::CRunningScript *thread) + { + auto name = OPCODE_READ_PARAM_STRING(); + + auto ptr = CLEO_GetScriptByName(name, true, true, 0); + + OPCODE_WRITE_PARAM_PTR(ptr); + OPCODE_CONDITION_RESULT(ptr != nullptr); + return OR_CONTINUE; + } + + //0AC6=2,get_label_pointer %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AC6(CLEO::CRunningScript* thread) + { + auto label = OPCODE_READ_PARAM_INT(); + + // perform + void* ptr = nullptr; + if (label < 0) + ptr = thread->GetBasePointer() - label; + else + ptr = CTheScripts::ScriptSpace + label; + + OPCODE_WRITE_PARAM_PTR(ptr); + return OR_CONTINUE; + } + + //0AC7=2,get_var_pointer %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AC7(CLEO::CRunningScript* thread) + { + auto resultType = thread->PeekDataType(); + if (!IsVariable(resultType) && IsVarString(resultType)) + { + SHOW_ERROR("Input argument #%d expected to be variable, got constant in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + auto ptr = CLEO_GetPointerToScriptVariable(thread); + + OPCODE_WRITE_PARAM_PTR(ptr); + return OR_CONTINUE; + } + + //0AC8=2, allocate_memory size %1d% store_to %2d% + static OpcodeResult WINAPI opcode_0AC8(CLEO::CRunningScript* thread) + { + // collect params + int size = OPCODE_READ_PARAM_INT(); + + // validate params + if (size <= 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + // perform + void* mem = calloc(size, 1); + if (mem) + { + DWORD oldProtect; + VirtualProtect(mem, size, PAGE_EXECUTE_READWRITE, &oldProtect); + + m_allocations.insert(mem); + } + else + LOG_WARNING(thread, "Failed to allocate %d bytes of memory in script %s", size, ScriptInfoStr(thread).c_str()); + + OPCODE_WRITE_PARAM_PTR(mem); + OPCODE_CONDITION_RESULT(mem != nullptr); + return OR_CONTINUE; + } + + //0AC9=1,free_memory %1d% + static OpcodeResult __stdcall opcode_0AC9(CLEO::CRunningScript* thread) + { + // collect params + auto address = OPCODE_READ_PARAM_PTR(); + + // validate params + if (m_allocations.find(address) == m_allocations.end()) + { + LOG_WARNING(thread, "Invalid '0x%X' pointer param to unknown or already freed memory in script %s", address, ScriptInfoStr(thread).c_str()); + return OR_CONTINUE; + } + + free(address); + m_allocations.erase(address); + return OR_CONTINUE; // done + } + + //0AE9=1,pop_float store_to %1d% + static OpcodeResult __stdcall opcode_0AE9(CLEO::CRunningScript* thread) + { + float result; + _asm fstp result + + OPCODE_WRITE_PARAM_FLOAT(result); + return OR_CONTINUE; + } + + //0AEA=2,get_ped_ref %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AEA(CLEO::CRunningScript* thread) + { + // collect params + auto ptr = (CPed*)OPCODE_READ_PARAM_PTR(); + + int handle = -1; + if (!CPools::ms_pPedPool->IsObjectValid(ptr)) + { + OPCODE_WRITE_PARAM_INT(-1); // invalid handle + return OR_CONTINUE; + } + + handle = CPools::GetPedRef(ptr); + + OPCODE_WRITE_PARAM_INT(handle); + return OR_CONTINUE; + } + + //0AEB=2,get_vehicle_ref %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AEB(CLEO::CRunningScript* thread) + { + auto ptr = (CVehicle*)OPCODE_READ_PARAM_PTR(); + + int handle = -1; + if (!CPools::ms_pVehiclePool->IsObjectValid(ptr)) + { + OPCODE_WRITE_PARAM_INT(-1); // invalid handle + return OR_CONTINUE; + } + + handle = CPools::GetVehicleRef(ptr); + + OPCODE_WRITE_PARAM_INT(handle); + return OR_CONTINUE; + } + + //0AEC=2,get_object_ref %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AEC(CLEO::CRunningScript* thread) + { + auto ptr = (CObject*)OPCODE_READ_PARAM_PTR(); + + int handle = -1; + if (!CPools::ms_pObjectPool->IsObjectValid(ptr)) + { + OPCODE_WRITE_PARAM_INT(-1); // invalid handle + return OR_CONTINUE; + } + + handle = CPools::GetObjectRef(ptr); + + OPCODE_WRITE_PARAM_INT(handle); + return OR_CONTINUE; + } + + //2400=3,copy_memory %1d% to %2d% size %3d% + static OpcodeResult __stdcall opcode_2400(CLEO::CRunningScript* thread) + { + auto src = (BYTE*)OPCODE_READ_PARAM_PTR(); + auto trg = (BYTE*)OPCODE_READ_PARAM_PTR(); + auto size = OPCODE_READ_PARAM_INT(); + + if (size == 0) + { + return OR_CONTINUE; // done + } + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + // memory blocks can not overlap + auto first = min(src, trg); + auto second = max(src, trg); + if ((first + size) > second) + { + SHOW_ERROR("Invalid overlapping memory blocks in script %s\nScript suspended.", ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + memcpy((void*)trg, (void*)src, size); + return OR_CONTINUE; + } + + //2401=4,read_memory_with_offset %1d% offset %2d% size %3d% store_to %4d% + static OpcodeResult __stdcall opcode_2401(CLEO::CRunningScript* thread) + { + auto ptr = (BYTE*)OPCODE_READ_PARAM_PTR(); + auto offset = OPCODE_READ_PARAM_INT(); + auto size = OPCODE_READ_PARAM_INT(); + + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + auto resultType = thread->PeekDataType(); + if (IsVariable(resultType)) + { + if (size == 0) + { + OPCODE_WRITE_PARAM_INT(0); + return OR_CONTINUE; // done + } + + DWORD result = 0; + if (size > sizeof(result)) + { + LOG_WARNING(thread, "Size '%d' argument out of supported range (%d) in script %s", size, sizeof(result), ScriptInfoStr(thread).c_str()); + size = sizeof(result); + } + if (size > 0) memcpy(&result, (void*)(ptr + offset), size); + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } + else if (IsVarString(resultType)) + { + std::string str(std::string_view((char*)ptr + offset, size)); // null terminated + OPCODE_WRITE_PARAM_STRING(str.c_str()); + return OR_CONTINUE; + } + + SHOW_ERROR("Invalid type (%s) of the result argument in script %s \nScript suspended.", ToKindStr(resultType), ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + //2402=4,write_memory_with_offset %1d% offset %2d% size %3d% value %4d% + static OpcodeResult __stdcall opcode_2402(CLEO::CRunningScript* thread) + { + auto ptr = (BYTE*)OPCODE_READ_PARAM_PTR(); + auto offset = OPCODE_READ_PARAM_INT(); + auto size = OPCODE_READ_PARAM_INT(); + + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + if (size == 0) + { + CLEO_SkipOpcodeParams(thread, 1); // value not used + return OR_CONTINUE; // done + } + + auto valueType = thread->PeekDataType(); + if (IsImmInteger(valueType) || IsImmFloat(valueType) || IsVariable(valueType)) + { + if (size > sizeof(DWORD)) + { + LOG_WARNING(thread, "Size '%d' argument out of supported range (%d) in script %s", size, sizeof(DWORD), ScriptInfoStr(thread).c_str()); + size = sizeof(DWORD); + } + + auto value = OPCODE_READ_PARAM_INT(); + memcpy(ptr + offset, &value, size); + + return OR_CONTINUE; + } + else if (IsImmString(valueType) || IsVarString(valueType)) + { + auto str = OPCODE_READ_PARAM_STRING(); + auto len = (int)strlen(str); + + memcpy(ptr + offset, str, min(size, len)); + if (size > len) ZeroMemory(ptr + offset + len, size - len); // fill rest with zeros + + return OR_CONTINUE; + } + + SHOW_ERROR("Invalid type (%s) of the input argument #%d in script %s \nScript suspended.", CLEO_GetParamsHandledCount(), ToKindStr(valueType), ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + //2403=1,forget_memory %1d% + static OpcodeResult __stdcall opcode_2403(CLEO::CRunningScript* thread) + { + // collect params + auto address = OPCODE_READ_PARAM_PTR(); + + // validate params + if (m_allocations.find(address) == m_allocations.end()) + { + LOG_WARNING(thread, "Invalid '0x%X' pointer param to unknown or already freed memory in script %s", address, ScriptInfoStr(thread).c_str()); + return OR_CONTINUE; + } + + m_allocations.erase(address); + return OR_CONTINUE; // done + } + + //2404=1,get_script_struct_just_created %1d% + static OpcodeResult __stdcall opcode_2404(CLEO::CScriptThread* thread) + { + auto head = thread; + while(head->Previous) + { + head = head->Previous; + } + + OPCODE_WRITE_PARAM_PTR(head); + return OR_CONTINUE; + } + + //2405=1, is_script_running %1d% + static OpcodeResult __stdcall opcode_2405(CLEO::CScriptThread* thread) + { + auto address = (CLEO::CScriptThread*)OPCODE_READ_PARAM_INT(); + + auto name = CLEO_GetScriptFilename(address); + + OPCODE_CONDITION_RESULT(name != nullptr); + return OR_CONTINUE; + } + + //2406=1, get_script_struct_from_filename %1s% + static OpcodeResult __stdcall opcode_2406(CLEO::CScriptThread* thread) + { + auto filename = OPCODE_READ_PARAM_STRING(); + + auto address = CLEO_GetScriptByFilename(filename); + + OPCODE_WRITE_PARAM_PTR(address); + OPCODE_CONDITION_RESULT(address != nullptr); + return OR_CONTINUE; + } +} Memory; + +std::set MemoryOperations::m_allocations; +std::set MemoryOperations::m_libraries; diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj new file mode 100644 index 00000000..3a5b08dc --- /dev/null +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj @@ -0,0 +1,144 @@ + + + + + Release + Win32 + + + Debug + Win32 + + + + {35C80F79-8B18-4925-8C32-94B320DBE76F} + true + Win32Proj + MemoryOperations + 10.0 + MemoryOperations + + + + DynamicLibrary + false + MultiByte + v143 + true + + + DynamicLibrary + true + MultiByte + v143 + + + + + + + + + + + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + MemoryOperations + .cleo5 + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + MemoryOperations + .cleo5 + + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) + stdcpp17 + + + true + true + true + UseLinkTimeCodeGeneration + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + Level3 + Disabled + true + MultiThreadedDebug + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) + stdcpp17 + + + true + Default + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + + + + + + + + + \ No newline at end of file diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters new file mode 100644 index 00000000..d26f1298 --- /dev/null +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters @@ -0,0 +1,43 @@ + + + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + + {1903661c-d3a7-4f51-8910-54b32282a46d} + + + {0c8900ae-85e5-4dc1-9d7b-173b6f8cd435} + + + + + cleo_sdk + + + cleo_sdk + + + \ No newline at end of file diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 7344f768..2c10e866 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -68,8 +68,20 @@ enum eDataType : BYTE DT_VAR_STRING, // globalVarVString v$ DT_LVAR_STRING, // localVarVString @v DT_VAR_STRING_ARRAY, // globalVarStringArr v$(,) - DT_LVAR_STRING_ARRAY // localVarStringArr @v(,) + DT_LVAR_STRING_ARRAY, // localVarStringArr @v(,) + DT_INVALID = 0xFF // CLEO internal }; + +enum eArrayDataType : BYTE +{ + ADT_INT, // variable with integer + ADT_FLOAT, // variable with integer + ADT_TEXTLABEL, // variable with short string (8 char) + ADT_STRING, // variable with long string (16 char) + ADT_NONE = 0xFF // CLEO internal +}; +static const BYTE ArrayDataTypeMask = ADT_INT | ADT_FLOAT | ADT_TEXTLABEL | ADT_STRING; // array flags byte contains other info too. Type needs to be masked when read + static const char* ToStr(eDataType type) { switch (type) @@ -151,7 +163,21 @@ static bool IsVariable(eDataType type) // can carry int, float, pointer to text } return false; } -static const char* ToKindStr(eDataType type) +static bool IsArray(eDataType type) +{ + switch (type) + { + case DT_LVAR_TEXTLABEL_ARRAY: + case DT_LVAR_STRING_ARRAY: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_VAR_STRING_ARRAY: + case DT_VAR_ARRAY: + case DT_LVAR_ARRAY: + return true; + } + return false; +} +static const char* ToKindStr(eDataType type, eArrayDataType arrType = ADT_NONE) { switch (type) { @@ -177,11 +203,26 @@ static const char* ToKindStr(eDataType type) return "string"; break; case DT_VAR: - case DT_VAR_ARRAY: case DT_LVAR: - case DT_LVAR_ARRAY: return "variable"; break; + case DT_VAR_ARRAY: + case DT_LVAR_ARRAY: + switch(arrType) + { + case ADT_INT: + return "int"; break; + + case ADT_FLOAT: + return "float"; break; + + case ADT_TEXTLABEL: + case ADT_STRING: + return "string"; break; + + default: return "variable"; + } + case DT_END: return "varArgEnd"; break; @@ -287,7 +328,7 @@ typedef SCRIPT_HANDLE HSTREAM; #ifdef __cplusplus class CRunningScript { -protected: +public: #else struct CRunningScript { @@ -377,6 +418,8 @@ struct CRunningScript void SetNotFlag(bool state) { NotFlag = state; } eDataType PeekDataType() const { return *(eDataType*)CurrentIP; } + eArrayDataType PeekArrayDataType() const { BYTE t = *(CurrentIP + 1 + 2 + 2 + 1); t &= ArrayDataTypeMask; return (eArrayDataType) t; } // result valid only for array type params + eDataType ReadDataType() { return (eDataType)ReadDataByte(); } short ReadDataVarIndex() { return ReadDataWord(); } short ReadDataArrayOffset() { return ReadDataWord(); } @@ -424,6 +467,10 @@ void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); // script utils void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); // compatibility mode +LPCSTR WINAPI CLEO_GetScriptFilename(const CRunningScript* thread); // returns nullptr if provided script ptr is not valid + +LPCSTR WINAPI CLEO_GetScriptWorkDir(const CRunningScript* thread); +void WINAPI CLEO_SetScriptWorkDir(CRunningScript* thread, const char* path); void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); @@ -434,15 +481,21 @@ DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek remaining var- extern SCRIPT_VAR* opcodeParams; extern SCRIPT_VAR* missionLocals; +SCRIPT_VAR* WINAPI CLEO_GetOpcodeParamsArray(); // get pointer to 'SCRIPT_VAR[32] opcodeParams'. Used by Retrieve/Record opcode params functions +BYTE WINAPI CLEO_GetParamsHandledCount(); // number of already read/written opcode parameters since current opcode handler was called + // param read SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // get pointer to the variable data. Advances script to next param void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); // read multiple params. Stored in opcodeParams array DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // returns nullptr on fail LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // exactly same as CLEO_ReadStringOpcodeParam void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator +// get param value without advancing the script +DWORD WINAPI CLEO_PeekIntOpcodeParam(CRunningScript* thread); +float WINAPI CLEO_PeekFloatOpcodeParam(CRunningScript* thread); // param skip without reading void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); @@ -460,6 +513,8 @@ void WINAPI CLEO_SetScriptDebugMode(CRunningScript* thread, BOOL enabled); CRunningScript* WINAPI CLEO_CreateCustomScript(CRunningScript* fromThread, const char* script_name, int label); CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript(); +CRunningScript* WINAPI CLEO_GetScriptByName(const char* threadName, BOOL standardScripts, BOOL customScripts, DWORD resultIndex = 0); // can be called multiple times to find more scripts named threadName. resultIndex should be incremented until the method returns nullptr +CRunningScript* WINAPI CLEO_GetScriptByFilename(const char* path, DWORD resultIndex = 0); // can be absolute, partial path or just filename // scripts deletion callback void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func); diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h new file mode 100644 index 00000000..81123e1d --- /dev/null +++ b/cleo_sdk/CLEO_Utils.h @@ -0,0 +1,471 @@ +// some utilities usefull when creating CLEO plugins +// requires adding "CPools.cpp" from GTA Plugin SDK to the project + +#pragma once +#include "CLEO.h" +#include "CFileMgr.h" // from GTA Plugin SDK +#include "CPools.h" // from GTA Plugin SDK +#include "shellapi.h" // game window minimize/maximize support +#include +#include + +namespace CLEO +{ + /* + TRACE(format,...) // log to file. Can be displayed on screen with change in DebugUtils.ini + LOG_WARNING(script, format, ...) // warning text on screen and in log file. Not displayed for scripts in 'legacy' mode + SHOW_ERROR(a,...) // message box, log to file + + Macros to use inside opcode handler functions. Include types validation, printing warnings and suspending script on critical errors. + Please mind those might expand into multiple lines, so should not, for example, be used as body of 'if' statements without brackets! + + OPCODE_CONDITION_RESULT(value) // set result + OPCODE_SKIP_PARAMS(count) // ignore X params + + // reading opcode input arguments + OPCODE_READ_PARAM_BOOL() + OPCODE_READ_PARAM_INT8() + OPCODE_READ_PARAM_UINT8() + OPCODE_READ_PARAM_INT16() + OPCODE_READ_PARAM_UINT16() + OPCODE_READ_PARAM_INT() + OPCODE_READ_PARAM_UINT() + OPCODE_READ_PARAM_FLOAT() + OPCODE_READ_PARAM_STRING() // returns char* to internal buffer. It might be overwritten by another string read! + OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) + OPCODE_READ_PARAM_FILEPATH() // returns char* to internal buffer. It might be overwritten by another string read! + OPCODE_READ_PARAM_PTR() // read and validate memory address argument + OPCODE_READ_PARAM_OBJECT_HANDLE() + OPCODE_READ_PARAM_PED_HANDLE() + OPCODE_READ_PARAM_VEHICLE_HANDLE() + OPCODE_READ_PARAM_OUTPUT_VAR() // pointer to write result later + OPCODE_READ_PARAM_OUTPUT_VAR_INT() // pointer to write integer result later + OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() // pointer to write float result later + + // writing opcode output/result data + OPCODE_WRITE_PARAM_BOOL(value) + OPCODE_WRITE_PARAM_INT8(value) + OPCODE_WRITE_PARAM_UINT8(value) + OPCODE_WRITE_PARAM_INT16(value) + OPCODE_WRITE_PARAM_UINT16(value) + OPCODE_WRITE_PARAM_INT(value) + OPCODE_WRITE_PARAM_UINT(value) + OPCODE_WRITE_PARAM_FLOAT(value) + OPCODE_WRITE_PARAM_STRING(value) + OPCODE_WRITE_PARAM_PTR(value) // memory address + */ + + // this plugin's config file + static std::string GetConfigFilename() + { + std::string configFile = CFileMgr::ms_rootDirName; + if (!configFile.empty() && configFile.back() != '\\') configFile.push_back('\\'); + + configFile += "cleo\\cleo_plugins\\" TARGET_NAME ".ini"; + + return configFile; + } + + static std::string StringPrintf(const char* format, ...) + { + va_list args; + + va_start(args, format); + auto len = std::vsnprintf(nullptr, 0, format, args) + 1; + va_end(args); + + std::string result(len, '\0'); + + va_start(args, format); + std::vsnprintf(result.data(), result.length(), format, args); + va_end(args); + + return result; + } + + static std::string ScriptInfoStr(CLEO::CRunningScript* thread) + { + std::string info(1024, '\0'); + CLEO_GetScriptInfoStr(thread, true, info.data(), info.length()); + return std::move(info); + } + + static bool IsObjectHandleValid(DWORD handle) + { + // get handle info + auto flags = handle & 0xFF; + auto index = handle >> 8; + + if (index >= (DWORD)CPools::ms_pObjectPool->m_nSize) + return false; // index out of bounds + + if (CPools::ms_pObjectPool->m_byteMap[index].IntValue() != flags) + return false; // flags mismatch + + return true; + } + + static bool IsPedHandleValid(DWORD handle) + { + // get handle info + auto flags = handle & 0xFF; + auto index = handle >> 8; + + if (index >= (DWORD)CPools::ms_pPedPool->m_nSize) + return false; // index out of bounds + + if (CPools::ms_pPedPool->m_byteMap[index].IntValue() != flags) + return false; // flags mismatch + + return true; + } + + static bool IsVehicleHandleValid(DWORD handle) + { + // get handle info + auto flags = handle & 0xFF; + auto index = handle >> 8; + + if (index >= (DWORD)CPools::ms_pVehiclePool->m_nSize) + return false; // index out of bounds + + if (CPools::ms_pVehiclePool->m_byteMap[index].IntValue() != flags) + return false; // flags mismatch + + return true; + } + + static const char* TraceVArg(CLEO::eLogLevel level, const char* format, va_list args) + { + static char szBuf[1024]; + vsprintf(szBuf, format, args); // put params into format + CLEO_Log(level, szBuf); + return szBuf; + } + + static void Trace(CLEO::eLogLevel level, const char* format, ...) + { + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); + } + + static void Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...) + { + if (thread != nullptr && CLEO_GetScriptVersion(thread) < CLEO::eCLEO_Version::CLEO_VER_5) + { + return; // do not log this in older versions + } + + va_list args; + va_start(args, format); + TraceVArg(level, format, args); + va_end(args); + } + + static void ShowError(const char* format, ...) + { + va_list args; + va_start(args, format); + auto msg = TraceVArg(CLEO::eLogLevel::Error, format, args); + va_end(args); + + QUERY_USER_NOTIFICATION_STATE pquns; + SHQueryUserNotificationState(&pquns); + bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); + + if (fullscreen) + { + PostMessage(NULL, WM_SYSCOMMAND, SC_MINIMIZE, 0); + ShowWindow(NULL, SW_MINIMIZE); + } + + MessageBox(NULL, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + + if (fullscreen) + { + PostMessage(NULL, WM_SYSCOMMAND, SC_RESTORE, 0); + ShowWindow(NULL, SW_RESTORE); + } + } + + #define TRACE(format,...) {CLEO::Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} + #define LOG_WARNING(script, format, ...) {CLEO::Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} + #define SHOW_ERROR(a,...) {CLEO::ShowError(a, __VA_ARGS__);} + + const size_t MinValidAddress = 0x10000; // used for validation of pointers received from scripts. First 64kb are for sure reserved by Windows. + #define OPCODE_VALIDATE_POINTER(x) if((size_t)x <= MinValidAddress) { SHOW_ERROR("Invalid '0x%X' pointer argument in script %s \nScript suspended.", x, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_CONDITION_RESULT(value) CLEO_SetThreadCondResult(thread, value); + + // opcode param handling utils internal + static SCRIPT_VAR* _paramsArray = nullptr; + static eDataType _lastParamType = eDataType::DT_END; + static eArrayDataType _lastParamArrayType = eArrayDataType::ADT_NONE; + + static SCRIPT_VAR& _readParam(CRunningScript* thread) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + CLEO_RetrieveOpcodeParams(thread, 1); + if (_paramsArray == nullptr) _paramsArray = CLEO_GetOpcodeParamsArray(); + return _paramsArray[0]; + } + + static SCRIPT_VAR* _readParamVariable(CRunningScript* thread) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + return CLEO_GetPointerToScriptVariable(thread); + } + + static void _writeParamPtr(CRunningScript* thread, void* valuePtr) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + if (_paramsArray == nullptr) _paramsArray = CLEO_GetOpcodeParamsArray(); + _paramsArray[0].pParam = valuePtr; + CLEO_RecordOpcodeParams(thread, 1); + } + + template static void _writeParam(CRunningScript* thread, T value) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + if (_paramsArray == nullptr) _paramsArray = CLEO_GetOpcodeParamsArray(); + _paramsArray[0].dwParam = 0; + memcpy(&_paramsArray[0], &value, sizeof(T)); + CLEO_RecordOpcodeParams(thread, 1); + } + + static inline bool _paramWasInt(bool output = false) + { + if (_lastParamArrayType != eArrayDataType::ADT_NONE) return _lastParamArrayType == eArrayDataType::ADT_INT; + if (IsVariable(_lastParamType)) return true; + if (!output && IsImmInteger(_lastParamType)) return true; + return false; + } + + static inline bool _paramWasFloat(bool output = false) + { + if (_lastParamArrayType != eArrayDataType::ADT_NONE) return _lastParamArrayType == eArrayDataType::ADT_FLOAT; + if (IsVariable(_lastParamType)) return true; + if (!output && IsImmFloat(_lastParamType)) return true; + return false; + } + + static inline bool _paramWasString(bool output = false) + { + if (_lastParamArrayType != eArrayDataType::ADT_NONE) + { + return _lastParamArrayType == eArrayDataType::ADT_STRING || + _lastParamArrayType == eArrayDataType::ADT_TEXTLABEL || + _lastParamArrayType == eArrayDataType::ADT_INT; // pointer to output buffer + } + + if (IsVarString(_lastParamType)) return true; + if (!output && IsImmString(_lastParamType)) return true; + + // pointer to output buffer + if (IsVariable(_lastParamType)) return true; + + return false; + } + + static inline bool _paramWasVariable() + { + return IsVariable(_lastParamType); + } + + static char* _readParamText(CRunningScript* thread, char* buffer = nullptr, DWORD bufferSize = 0) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + if (!_paramWasString()) + { + SHOW_ERROR("Input argument #%d expected to be string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + _lastParamType = DT_INVALID; // mark error + return nullptr; + } + + auto str = CLEO_ReadStringOpcodeParam(thread, buffer, bufferSize); + if (str == nullptr) // other error? + { + SHOW_ERROR("Invalid input argument #%d in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + _lastParamType = DT_INVALID; // mark error + return nullptr; + } + + return str; + } + + static char* _readParamFilepath(CRunningScript* thread) + { + auto str = _readParamText(thread); + if (str == nullptr) return nullptr; + + CLEO_ResolvePath(thread, str, MAX_STR_LEN); // uses generic readStringParam's buffer + return str; + } + + static bool _writeParamText(CRunningScript* thread, const char* str) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + if (str != nullptr && (size_t)str <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' source pointer of output string argument #%d in script %s \nScript suspended.", str, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + + if (!_paramWasString(true)) + { + SHOW_ERROR("Output argument #%d expected to be variable string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + + if (IsVariable(_lastParamType)) // pointer to buffer + { + auto ptr = CLEO_PeekIntOpcodeParam(thread); + + if ((size_t)ptr <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer of output string argument #%d in script %s \nScript suspended.", ptr, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + } + + char* buff = nullptr; + int size = 0; + DWORD needTerminator = false; + CLEO_ReadStringParamWriteBuffer(thread, &buff, &size, &needTerminator); + + if (buff == nullptr) // all error types already handled, but check just in case + { + SHOW_ERROR("Invalid output argument #%d in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + + if (size == 0) + { + return true; // done + } + + bool addTerminator = needTerminator; + size_t buffLen = size - addTerminator; + size_t length = str == nullptr ? 0 : strlen(str); + + if (buffLen > length) addTerminator = true; // there is space left for terminator + + length = min(length, buffLen); + if (length > 0) std::memcpy(buff, str, length); + if (addTerminator) buff[length] = '\0'; + return true; // done + } + + #define OPCODE_SKIP_PARAMS(_count) CLEO_SkipOpcodeParams(thread, _count) + + // macros for reading opcode input params. Performs type validation, throws error and suspends script if user provided invalid argument type + // TOD: add range checks for limited size types? + + #define OPCODE_READ_PARAM_BOOL() _readParam(thread).bParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_INT8() _readParam(thread).cParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_UINT8() _readParam(thread).ucParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_INT16() _readParam(thread).wParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_UINT16() _readParam(thread).usParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_INT() _readParam(thread).nParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_UINT() _readParam(thread).dwParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_FLOAT() _readParam(thread).fParam; \ + if (!_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_STRING() _readParamText(thread); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + + #define OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) _readParamText(thread, _buffer, _bufferSize); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + + #define OPCODE_READ_PARAM_FILEPATH() _readParamFilepath(thread); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + + #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (_paramsArray[0].dwParam <= MinValidAddress) { SHOW_ERROR("Invalid pointer '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_OBJECT_HANDLE() _readParam(thread).dwParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (IsObjectHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid object handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_PED_HANDLE() _readParam(thread).dwParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (IsPedHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid character handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_VEHICLE_HANDLE() _readParam(thread).dwParam; \ + if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_OUTPUT_VAR() _readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_OUTPUT_VAR_INT() _readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() _readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type + + #define OPCODE_WRITE_PARAM_BOOL(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_INT8(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_UINT8(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_INT16(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_UINT16(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_INT(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_UINT(value) _writeParam(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ + if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } + + #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, value); \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } +} diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 2f4af817..441bbdc6 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -22,8 +22,6 @@ namespace CLEO template inline CRunningScript& operator<<(CRunningScript& thread, memory_pointer pval); template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); - OpcodeResult __stdcall opcode_0A8C(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A8D(CRunningScript *thread); OpcodeResult __stdcall opcode_0A8E(CRunningScript *thread); OpcodeResult __stdcall opcode_0A8F(CRunningScript *thread); OpcodeResult __stdcall opcode_0A90(CRunningScript *thread); @@ -32,22 +30,9 @@ namespace CLEO OpcodeResult __stdcall opcode_0A93(CRunningScript *thread); OpcodeResult __stdcall opcode_0A94(CRunningScript *thread); OpcodeResult __stdcall opcode_0A95(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A96(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A97(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A98(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A99(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A9F(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA0(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA4(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA5(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA6(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA7(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA8(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA9(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AAA(CRunningScript *thread); OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread); OpcodeResult __stdcall opcode_0AAD(CRunningScript *thread); OpcodeResult __stdcall opcode_0AAE(CRunningScript *thread); @@ -74,10 +59,6 @@ namespace CLEO OpcodeResult __stdcall opcode_0AC3(CRunningScript *thread); OpcodeResult __stdcall opcode_0AC4(CRunningScript *thread); OpcodeResult __stdcall opcode_0AC5(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC6(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC7(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC8(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC9(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread); @@ -98,19 +79,14 @@ namespace CLEO OpcodeResult __stdcall opcode_0AE1(CRunningScript *thread); OpcodeResult __stdcall opcode_0AE2(CRunningScript *thread); OpcodeResult __stdcall opcode_0AE3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AE9(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AEA(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AEB(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AEC(CRunningScript *thread); OpcodeResult __stdcall opcode_0AED(CRunningScript *thread); OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread); OpcodeResult __stdcall opcode_0AEF(CRunningScript *thread); OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform - OpcodeResult __stdcall opcode_2000(CRunningScript* thread); // resolve_filepath - OpcodeResult __stdcall opcode_2001(CRunningScript* thread); // get_script_filename + // 2000 free slot + // 2001 free slot OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_fail - OpcodeResult __stdcall opcode_2004(CRunningScript* thread); // forget_memory typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { @@ -147,15 +123,6 @@ namespace CLEO ScriptDeleteDelegate scriptDeleteDelegate; void RunScriptDeleteDelegate(CRunningScript *script) { scriptDeleteDelegate(script); } - CBuildingPool **buildingPool = nullptr; // add for future CLEO releases - CVehiclePool **vehiclePool = nullptr; - CObjectPool **objectPool = nullptr; - CPedPool **pedPool = nullptr; - - inline CPedPool& GetPedPool() { return **pedPool; } - inline CVehiclePool& GetVehiclePool() { return **vehiclePool; } - inline CObjectPool& GetObjectPool() { return **objectPool; } - void(__thiscall * ProcessScript)(CRunningScript*); const char * (__cdecl * GetUserDirectory)(); @@ -178,6 +145,7 @@ namespace CLEO WORD CCustomOpcodeSystem::lastCustomOpcode = 0; std::string CCustomOpcodeSystem::lastErrorMsg = {}; WORD CCustomOpcodeSystem::prevOpcode = 0; + BYTE CCustomOpcodeSystem::handledParamCount = 0; // opcode handler for custom opcodes OpcodeResult __fastcall CCustomOpcodeSystem::customOpcodeHandler(CRunningScript *thread, int dummy, WORD opcode) @@ -187,6 +155,7 @@ namespace CLEO lastScript = thread; lastOpcode = opcode; lastOpcodePtr = (WORD*)thread->GetBytePointer() - 1; // rewind to the opcode start + handledParamCount = 0; // execute registered callbacks OpcodeResult result = OR_NONE; @@ -245,122 +214,9 @@ namespace CLEO return (callbackResult != OR_NONE) ? callbackResult : result; } - OpcodeResult CCustomOpcodeSystem::CallFunctionGeneric(WORD opcode, CRunningScript* thread, bool thisCall, bool returnArg) - { - void* func; *thread >> func; - void* struc = nullptr; if(thisCall) *thread >> struc; - DWORD numParams; *thread >> numParams; - DWORD stackAlign; *thread >> stackAlign; // pop - - if ((size_t)func <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' function pointer param of opcode [%04X] in script %s\nScript suspended.", func, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - if (thisCall && (size_t)struc <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' struct pointer param of opcode [%04X] in script %s\nScript suspended.", struc, opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - int nVarArg = GetVarArgCount(thread); - if (numParams + returnArg != nVarArg) // and return argument - { - SHOW_ERROR("Opcode [%04X] declared %d input args, but provided %d in script %s\nScript suspended.", opcode, numParams, (int)nVarArg - returnArg, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - constexpr size_t Max_Args = 32; - if (numParams > Max_Args) - { - SHOW_ERROR("Opcode [%04X] used with more than supported arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - static SCRIPT_VAR arguments[Max_Args] = { 0 }; - SCRIPT_VAR* arguments_end = arguments + numParams; - - constexpr size_t Max_Text_Params = 5; - static char textParams[Max_Text_Params][MAX_STR_LEN]; - size_t currTextParam = 0; - - stackAlign *= 4; // bytes peer argument - - // retrieve parameters - for (size_t i = 0; i < numParams; i++) - { - auto& param = arguments[i]; - auto paramType = thread->PeekDataType(); - - if (IsImmInteger(paramType) || IsVariable(paramType)) - *thread >> param.dwParam; - else - if (IsImmFloat(paramType)) - *thread >> param.fParam; - else - if (IsImmString(paramType) || IsVarString(paramType)) - { - if (currTextParam >= Max_Text_Params) - { - SHOW_ERROR("Opcode [%04X] used with more than supported string arguments in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - param.pcParam = ReadStringParam(thread, textParams[currTextParam], MAX_STR_LEN); OPCODE_VALIDATE_STR_ARG_READ(param.pcParam) - currTextParam++; - } - else - { - SHOW_ERROR("Invalid param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - } - - // validate return target variable - if (returnArg) - { - auto paramType = thread->PeekDataType(); - - if (!IsVariable(paramType) && !IsVarString(paramType)) - { - SHOW_ERROR("Invalid return param type (%s) in opcode [%04X] in script %s \nScript suspended.", opcode, ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - } - - DWORD result; - _asm - { - // transfer args to stack - lea ecx, arguments - call_func_loop: - cmp ecx, arguments_end - jae call_func_loop_end - push[ecx] - add ecx, 0x4 - jmp call_func_loop - call_func_loop_end: - - // call function - mov ecx, struc - xor eax, eax - call func - mov result, eax // get result - add esp, stackAlign // cleanup stack - } - - if (returnArg) *thread << result; - - SkipUnusedVarArgs(thread); - return OR_CONTINUE; - } - void CCustomOpcodeSystem::FinalizeScriptObjects() { - TRACE("Cleaning up script data... %u libs, %u allocations...", - m_hNativeLibs.size(), m_pAllocations.size() - ); + TRACE("Cleaning up script data..."); for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsFinalize)) { @@ -368,23 +224,13 @@ namespace CLEO ((callback*)func)(); } - // clean up after opcode_0AA2 - std::for_each(m_hNativeLibs.begin(), m_hNativeLibs.end(), FreeLibrary); - m_hNativeLibs.clear(); - // clean up after opcode_0AB1 ScmFunction::Clear(); - - // clean up after opcode_0AC8 - std::for_each(m_pAllocations.begin(), m_pAllocations.end(), free); - m_pAllocations.clear(); } CCustomOpcodeSystem::CCustomOpcodeSystem() { // register CLEO opcodes - CLEO_RegisterOpcode(0x0A8C, opcode_0A8C); - CLEO_RegisterOpcode(0x0A8D, opcode_0A8D); CLEO_RegisterOpcode(0x0A8E, opcode_0A8E); CLEO_RegisterOpcode(0x0A8F, opcode_0A8F); CLEO_RegisterOpcode(0x0A90, opcode_0A90); @@ -393,22 +239,9 @@ namespace CLEO CLEO_RegisterOpcode(0x0A93, opcode_0A93); CLEO_RegisterOpcode(0x0A94, opcode_0A94); CLEO_RegisterOpcode(0x0A95, opcode_0A95); - CLEO_RegisterOpcode(0x0A96, opcode_0A96); - CLEO_RegisterOpcode(0x0A97, opcode_0A97); - CLEO_RegisterOpcode(0x0A98, opcode_0A98); - CLEO_RegisterOpcode(0x0A99, opcode_0A99); - CLEO_RegisterOpcode(0x0A9F, opcode_0A9F); CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); - CLEO_RegisterOpcode(0x0AA2, opcode_0AA2); - CLEO_RegisterOpcode(0x0AA3, opcode_0AA3); - CLEO_RegisterOpcode(0x0AA4, opcode_0AA4); - CLEO_RegisterOpcode(0x0AA5, opcode_0AA5); - CLEO_RegisterOpcode(0x0AA6, opcode_0AA6); - CLEO_RegisterOpcode(0x0AA7, opcode_0AA7); - CLEO_RegisterOpcode(0x0AA8, opcode_0AA8); CLEO_RegisterOpcode(0x0AA9, opcode_0AA9); - CLEO_RegisterOpcode(0x0AAA, opcode_0AAA); CLEO_RegisterOpcode(0x0AAC, opcode_0AAC); CLEO_RegisterOpcode(0x0AAD, opcode_0AAD); CLEO_RegisterOpcode(0x0AAE, opcode_0AAE); @@ -435,10 +268,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0AC3, opcode_0AC3); CLEO_RegisterOpcode(0x0AC4, opcode_0AC4); CLEO_RegisterOpcode(0x0AC5, opcode_0AC5); - CLEO_RegisterOpcode(0x0AC6, opcode_0AC6); - CLEO_RegisterOpcode(0x0AC7, opcode_0AC7); - CLEO_RegisterOpcode(0x0AC8, opcode_0AC8); - CLEO_RegisterOpcode(0x0AC9, opcode_0AC9); CLEO_RegisterOpcode(0x0ACA, opcode_0ACA); CLEO_RegisterOpcode(0x0ACB, opcode_0ACB); CLEO_RegisterOpcode(0x0ACC, opcode_0ACC); @@ -459,19 +288,15 @@ namespace CLEO CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); - CLEO_RegisterOpcode(0x0AE9, opcode_0AE9); - CLEO_RegisterOpcode(0x0AEA, opcode_0AEA); - CLEO_RegisterOpcode(0x0AEB, opcode_0AEB); - CLEO_RegisterOpcode(0x0AEC, opcode_0AEC); CLEO_RegisterOpcode(0x0AED, opcode_0AED); CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); + CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform - CLEO_RegisterOpcode(0x2000, opcode_2000); // resolve_filepath - CLEO_RegisterOpcode(0x2001, opcode_2001); // get_script_filename + + // 2000, 2001 free CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail - CLEO_RegisterOpcode(0x2004, opcode_2004); // forget_memory } void CCustomOpcodeSystem::Inject(CCodeInjector& inj) @@ -496,9 +321,6 @@ namespace CLEO MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); MemWrite(0x00469EF0, &customOpcodeHandlers); // TODO: game version translation - pedPool = gvm.TranslateMemoryAddress(MA_PED_POOL); - vehiclePool = gvm.TranslateMemoryAddress(MA_VEHICLE_POOL); - objectPool = gvm.TranslateMemoryAddress(MA_OBJECT_POOL); GetUserDirectory = gvm.TranslateMemoryAddress(MA_GET_USER_DIR_FUNCTION); ChangeToUserDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_USER_DIR_FUNCTION); ChangeToProgramDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_PROGRAM_DIR_FUNCTION); @@ -545,12 +367,6 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, DWORD& uval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsImmInteger(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - GetScriptParams(&thread, 1); uval = opcodeParams[0].dwParam; return thread; @@ -558,12 +374,6 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, DWORD uval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - opcodeParams[0].dwParam = uval; SetScriptParams(&thread, 1); return thread; @@ -571,12 +381,6 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, int& nval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsImmInteger(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Reading integer argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - GetScriptParams(&thread, 1); nval = opcodeParams[0].nParam; return thread; @@ -584,12 +388,6 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, int nval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Writing integer, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - opcodeParams[0].nParam = nval; SetScriptParams(&thread, 1); return thread; @@ -597,12 +395,6 @@ namespace CLEO inline CRunningScript& operator>>(CRunningScript& thread, float& fval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsImmFloat(paramType) && !IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Reading float argument, got %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - GetScriptParams(&thread, 1); fval = opcodeParams[0].fParam; return thread; @@ -610,35 +402,11 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, float fval) { - auto paramType = (eDataType)*thread.GetBytePointer(); - if (!IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - LOG_WARNING(&thread, "Writing float, got argument type %s in script %s", ToKindStr(paramType), ((CCustomScript*)&thread)->GetInfoStr().c_str()); - } - opcodeParams[0].fParam = fval; SetScriptParams(&thread, 1); return thread; } - inline CRunningScript& operator>>(CRunningScript& thread, CVector& vec) - { - GetScriptParams(&thread, 3); - vec.x = opcodeParams[0].fParam; - vec.y = opcodeParams[1].fParam; - vec.z = opcodeParams[2].fParam; - return thread; - } - - inline CRunningScript& operator<<(CRunningScript& thread, const CVector& vec) - { - opcodeParams[0].fParam = vec.x; - opcodeParams[1].fParam = vec.y; - opcodeParams[2].fParam = vec.z; - SetScriptParams(&thread, 3); - return thread; - } - template inline CRunningScript& operator>>(CRunningScript& thread, T *& pval) { @@ -1187,71 +955,6 @@ namespace CLEO /* Opcode definitions */ /************************************************************************/ - //0A8C=4,write_memory %1d% size %2d% value %3d% virtual_protect %4d% - OpcodeResult __stdcall opcode_0A8C(CRunningScript *thread) - { - GetScriptParams(thread, 4); - void *address = opcodeParams[0].pParam; - DWORD size = opcodeParams[1].dwParam; - DWORD value = opcodeParams[2].dwParam; - bool vp = opcodeParams[3].bParam; - - if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8C] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - switch (size) - { - default: - GetInstance().CodeInjector.MemoryWrite(address, (BYTE)value, vp, size); - break; - case 2: - GetInstance().CodeInjector.MemoryWrite(address, (WORD)value, vp); - break; - case 4: - GetInstance().CodeInjector.MemoryWrite(address, (DWORD)value, vp); - break; - } - return OR_CONTINUE; - } - - //0A8D=4,%4d% = read_memory %1d% size %2d% virtual_protect %3d% - OpcodeResult __stdcall opcode_0A8D(CRunningScript *thread) - { - GetScriptParams(thread, 3); - void *address = opcodeParams[0].pParam; - DWORD size = opcodeParams[1].dwParam; - bool vp = opcodeParams[2].bParam; - - if ((size_t)address <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' pointer param of opcode [0A8D] in script %s\nScript suspended.", address, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - opcodeParams[0].dwParam = 0; - switch (size) - { - case 1: - GetInstance().CodeInjector.MemoryRead(address, (BYTE)opcodeParams[0].ucParam, vp); - break; - case 2: - GetInstance().CodeInjector.MemoryRead(address, (WORD)opcodeParams[0].usParam, vp); - break; - case 4: - GetInstance().CodeInjector.MemoryRead(address, (DWORD)opcodeParams[0].dwParam, vp); - break; - default: - SHOW_ERROR("Invalid size param '%d' of opcode [0A8D] in script %s\nScript suspended.", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - //0A8E=3,%3d% = %1d% + %2d% ; int OpcodeResult __stdcall opcode_0A8E(CRunningScript *thread) { @@ -1314,7 +1017,7 @@ namespace CLEO return OR_CONTINUE; } - //0A93=0,end_custom_thread + //0A93=0,terminate_this_custom_script OpcodeResult __stdcall opcode_0A93(CRunningScript *thread) { CCustomScript *cs = reinterpret_cast(thread); @@ -1366,76 +1069,6 @@ namespace CLEO return OR_CONTINUE; } - //0A96=2,%2d% = actor %1d% struct - OpcodeResult __stdcall opcode_0A96(CRunningScript *thread) - { - DWORD handle; - *thread >> handle; - *thread << GetPedPool().GetAtRef(handle); - return OR_CONTINUE; - } - - //0A97=2,%2d% = car %1d% struct - OpcodeResult __stdcall opcode_0A97(CRunningScript *thread) - { - DWORD handle; - *thread >> handle; - *thread << GetVehiclePool().GetAtRef(handle); - return OR_CONTINUE; - } - - //0A98=2,%2d% = object %1d% struct - OpcodeResult __stdcall opcode_0A98(CRunningScript *thread) - { - DWORD handle; - *thread >> handle; - *thread << GetObjectPool().GetAtRef(handle); - return OR_CONTINUE; - } - - //0A99=1,chdir %1b:userdir/rootdir% - OpcodeResult __stdcall opcode_0A99(CRunningScript *thread) - { - auto paramType = *thread->GetBytePointer(); - if (paramType == DT_BYTE || - paramType == DT_WORD || - paramType == DT_DWORD || - paramType == DT_VAR || - paramType == DT_LVAR || - paramType == DT_VAR_ARRAY || - paramType == DT_LVAR_ARRAY) - { - // numbered predefined paths - DWORD param; *thread >> param; - - const char* path; - switch(param) - { - case 0: path = DIR_GAME; break; - case 1: path = DIR_USER; break; - case 2: path = DIR_SCRIPT; break; - default: - LOG_WARNING(0, "Value (%d) not known by opcode [0A99] in script %s", param, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return OR_CONTINUE; - } - - reinterpret_cast(thread)->SetWorkDir(path); - } - else - { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) - reinterpret_cast(thread)->SetWorkDir(path); - } - return OR_CONTINUE; - } - - //0A9F=1,%1d% = current_thread_pointer - OpcodeResult __stdcall opcode_0A9F(CRunningScript *thread) - { - *thread << thread; - return OR_CONTINUE; - } - //0AA0=1,gosub_if_false %1p% OpcodeResult __stdcall opcode_0AA0(CRunningScript *thread) { @@ -1455,68 +1088,6 @@ namespace CLEO return OR_CONTINUE; } - //0AA2=2,%2h% = load_library %1d% // IF and SET - OpcodeResult __stdcall opcode_0AA2(CRunningScript *thread) - { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) - - auto filename = reinterpret_cast(thread)->ResolvePath(path); - - auto libHandle = LoadLibrary(filename.c_str()); - *thread << libHandle; - SetScriptCondResult(thread, libHandle != nullptr); - if (libHandle) GetInstance().OpcodeSystem.m_hNativeLibs.insert(libHandle); - - return OR_CONTINUE; - } - - //0AA3=1,free_library %1h% - OpcodeResult __stdcall opcode_0AA3(CRunningScript *thread) - { - HMODULE libHandle; - *thread >> libHandle; - FreeLibrary(libHandle); - GetInstance().OpcodeSystem.m_hNativeLibs.erase(libHandle); - return OR_CONTINUE; - } - - //0AA4=3,%3d% = get_proc_address %1d% library %2d% // IF and SET - OpcodeResult __stdcall opcode_0AA4(CRunningScript *thread) - { - auto funcName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(funcName) - - HMODULE libHandle; - *thread >> libHandle; - void *funcAddr = (void *)GetProcAddress(libHandle, funcName); - *thread << funcAddr; - SetScriptCondResult(thread, funcAddr != nullptr); - return OR_CONTINUE; - } - - //0AA5=-1,call %1d% num_params %2h% pop %3h% - OpcodeResult __stdcall opcode_0AA5(CRunningScript *thread) - { - return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA5, thread, false, false); - } - - //0AA6=-1,call_method %1d% struct %2d% num_params %3h% pop %4h% - OpcodeResult __stdcall opcode_0AA6(CRunningScript *thread) - { - return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA6, thread, true, false); - } - - //0AA7=-1,call_function_return %1d% num_params %2h% pop %3h% - OpcodeResult __stdcall opcode_0AA7(CRunningScript *thread) - { - return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA7, thread, false, true); - } - - //0AA8=-1,call_method_return %1d% struct %2d% num_params %3h% pop %4h% - OpcodeResult __stdcall opcode_0AA8(CRunningScript *thread) - { - return CCustomOpcodeSystem::CallFunctionGeneric(0x0AA8, thread, true, true); - } - //0AA9=0, is_game_version_original OpcodeResult __stdcall opcode_0AA9(CRunningScript *thread) { @@ -1526,18 +1097,6 @@ namespace CLEO return OR_CONTINUE; } - //0AAA=2, %2d% = thread %1d% pointer // IF and SET - OpcodeResult __stdcall opcode_0AAA(CRunningScript *thread) - { - auto threadName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(threadName) - threadName[7] = '\0'; - CRunningScript *cs = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); - if (!cs) cs = GetInstance().ScriptEngine.FindScriptNamed(threadName); - *thread << cs; - SetScriptCondResult(thread, cs != nullptr); - return OR_CONTINUE; - } - //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { @@ -1780,30 +1339,63 @@ namespace CLEO return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, true, returnParamCount); } - //0AB3=2,var %1d% = %2d% + //0AB3=2,set_cleo_shared_var %1d% = %2d% OpcodeResult __stdcall opcode_0AB3(CRunningScript *thread) { - DWORD varId, value; - *thread >> varId >> value; - GetInstance().ScriptEngine.CleoVariables[varId].dwParam = value; + auto varIdx = OPCODE_READ_PARAM_INT(); + + const auto VarCount = _countof(CScriptEngine::CleoVariables); + if (varIdx < 0 || varIdx >= VarCount) + { + SHOW_ERROR("Variable index '%d' out of supported range in script %s\nScript suspended.", varIdx, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } + + auto paramType = thread->PeekDataType(); + if (!IsImmInteger(paramType) && + !IsImmFloat(paramType) && + !IsVariable(paramType)) + { + SHOW_ERROR("Invalid value type (%s) in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } + + GetScriptParams(thread, 1); + GetInstance().ScriptEngine.CleoVariables[varIdx].dwParam = opcodeParams[0].dwParam; return OR_CONTINUE; } - //0AB4=2,%2d% = var %1d% + //0AB4=2,%2d% = get_cleo_shared_var %1d% OpcodeResult __stdcall opcode_0AB4(CRunningScript *thread) { - DWORD varId; - *thread >> varId; - *thread << GetInstance().ScriptEngine.CleoVariables[varId].dwParam; + auto varIdx = OPCODE_READ_PARAM_INT(); + + const auto VarCount = _countof(CScriptEngine::CleoVariables); + if (varIdx < 0 || varIdx >= VarCount) + { + SHOW_ERROR("Variable index '%d' out of supported range in script %s\nScript suspended.", varIdx, ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } + + auto paramType = thread->PeekDataType(); + if (!IsVariable(paramType)) + { + SHOW_ERROR("Invalid result argument type (%s) in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } + + opcodeParams[0].dwParam = GetInstance().ScriptEngine.CleoVariables[varIdx].dwParam; + CLEO_RecordOpcodeParams(thread, 1); return OR_CONTINUE; } //0AB5=3,store_actor %1d% closest_vehicle_to %2d% closest_ped_to %3d% OpcodeResult __stdcall opcode_0AB5(CRunningScript *thread) { - DWORD actor; - *thread >> actor; - auto pPlayerPed = GetPedPool().GetAtRef(actor); + auto handle = OPCODE_READ_PARAM_PED_HANDLE(); + + auto pPlayerPed = CPools::GetPed(handle); + CPedIntelligence * pedintel; if (pPlayerPed && (pedintel = pPlayerPed->m_pIntelligence)) { @@ -1825,7 +1417,7 @@ namespace CLEO pPed = nullptr; } - *thread << (pVehicle ? GetVehiclePool().GetRef(pVehicle) : -1) << (pPed ? GetPedPool().GetRef(pPed) : -1); + *thread << (pVehicle ? CPools::GetVehicleRef(pVehicle) : -1) << (pPed ? CPools::GetPedRef(pPed) : -1); } else *thread << -1 << -1; return OR_CONTINUE; @@ -1842,13 +1434,16 @@ namespace CLEO { CVector coords(pMarker->m_vecPos); coords.z = FindGroundZ(coords.x, coords.y); - *thread << coords; - SetScriptCondResult(thread, true); + + OPCODE_WRITE_PARAM_FLOAT(coords.x); + OPCODE_WRITE_PARAM_FLOAT(coords.y); + OPCODE_WRITE_PARAM_FLOAT(coords.z); + OPCODE_CONDITION_RESULT(true); } else { - GetScriptParams(thread, 3); - SetScriptCondResult(thread, false); + OPCODE_SKIP_PARAMS(3); + OPCODE_CONDITION_RESULT(false); } return OR_CONTINUE; @@ -1857,18 +1452,24 @@ namespace CLEO //0AB7=2,get_vehicle %1d% number_of_gears_to %2d% OpcodeResult __stdcall opcode_0AB7(CRunningScript *thread) { - DWORD hVehicle; - *thread >> hVehicle; - *thread << GetVehiclePool().GetAtRef(hVehicle)->m_pHandlingData->m_transmissionData.m_nNumberOfGears; + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + + auto vehicle = CPools::GetVehicle(handle); + auto gears = vehicle->m_pHandlingData->m_transmissionData.m_nNumberOfGears; + + OPCODE_WRITE_PARAM_INT(gears); return OR_CONTINUE; } //0AB8=2,get_vehicle %1d% current_gear_to %2d% OpcodeResult __stdcall opcode_0AB8(CRunningScript *thread) { - DWORD hVehicle; - *thread >> hVehicle; - *thread << GetVehiclePool().GetAtRef(hVehicle)->m_nCurrentGear; + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + + auto vehicle = CPools::GetVehicle(handle); + auto gear = vehicle->m_nCurrentGear; + + OPCODE_WRITE_PARAM_INT(gear); return OR_CONTINUE; } @@ -1886,7 +1487,7 @@ namespace CLEO { auto threadName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(threadName) - auto deleted_thread = GetInstance().ScriptEngine.FindCustomScriptNamed(threadName); + auto deleted_thread = (CCustomScript*)GetInstance().ScriptEngine.FindScriptNamed(threadName, false, true, 0); if (deleted_thread) { GetInstance().ScriptEngine.RemoveCustomScript(deleted_thread); @@ -1916,29 +1517,36 @@ namespace CLEO //0ABD=1, vehicle %1d% siren_on OpcodeResult __stdcall opcode_0ABD(CRunningScript *thread) { - DWORD hVehicle; - *thread >> hVehicle; - SetScriptCondResult(thread, GetVehiclePool().GetAtRef(hVehicle)->m_nVehicleFlags.bSirenOrAlarm); + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + + auto vehicle = CPools::GetVehicle(handle); + auto state = vehicle->m_nVehicleFlags.bSirenOrAlarm; + + OPCODE_CONDITION_RESULT(state); return OR_CONTINUE; } //0ABE=1, vehicle %1d% engine_on OpcodeResult __stdcall opcode_0ABE(CRunningScript *thread) { - DWORD hVehicle; - *thread >> hVehicle; - SetScriptCondResult(thread, GetVehiclePool().GetAtRef(hVehicle)->m_nVehicleFlags.bEngineOn); + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + + auto vehicle = CPools::GetVehicle(handle); + auto state = vehicle->m_nVehicleFlags.bEngineOn; + + OPCODE_CONDITION_RESULT(state); return OR_CONTINUE; } //0ABF=2,set_vehicle %1d% engine_state_to %2d% OpcodeResult __stdcall opcode_0ABF(CRunningScript *thread) { - DWORD hVehicle, - state; - *thread >> hVehicle >> state; - auto veh = GetVehiclePool().GetAtRef(hVehicle); - veh->m_nVehicleFlags.bEngineOn = state != false; + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + auto state = OPCODE_READ_PARAM_BOOL(); + + auto vehicle = CPools::GetVehicle(handle); + + vehicle->m_nVehicleFlags.bEngineOn = state != false; return OR_CONTINUE; } @@ -1966,101 +1574,49 @@ namespace CLEO //0AC2=4,set_3d_audiostream %1d% position %2d% %3d% %4d% OpcodeResult __stdcall opcode_0AC2(CRunningScript *thread) { - CAudioStream *stream; + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); CVector pos; - *thread >> stream >> pos; - if (stream) stream->Set3dPosition(pos); + pos.x = OPCODE_READ_PARAM_FLOAT(); + pos.y = OPCODE_READ_PARAM_FLOAT(); + pos.z = OPCODE_READ_PARAM_FLOAT(); + + stream->Set3dPosition(pos); return OR_CONTINUE; } //0AC3=2,link_3d_audiostream %1d% to_object %2d% OpcodeResult __stdcall opcode_0AC3(CRunningScript *thread) { - CAudioStream *stream; - DWORD handle; - *thread >> stream >> handle; - if (stream) stream->Link(GetObjectPool().GetAtRef(handle)); - return OR_CONTINUE; - } + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); + auto handle = OPCODE_READ_PARAM_OBJECT_HANDLE(); - //0AC4=2,link_3d_audiostream %1d% to_actor %2d% - OpcodeResult __stdcall opcode_0AC4(CRunningScript *thread) - { - CAudioStream *stream; - DWORD handle; - *thread >> stream >> handle; - if (stream) stream->Link(GetPedPool().GetAtRef(handle)); - return OR_CONTINUE; - } + auto object = CPools::GetObject(handle); - //0AC5=2,link_3d_audiostream %1d% to_vehicle %2d% - OpcodeResult __stdcall opcode_0AC5(CRunningScript *thread) - { - CAudioStream *stream; - DWORD handle; - *thread >> stream >> handle; - if (stream) stream->Link(GetVehiclePool().GetAtRef(handle)); + stream->Link(object); return OR_CONTINUE; } - //0AC6=2,%2d% = label %1p% offset - OpcodeResult __stdcall opcode_0AC6(CRunningScript *thread) - { - int label; - *thread >> label; - *thread << (label < 0 ? thread->GetBasePointer() - label : scmBlock + label); - return OR_CONTINUE; - } - - //0AC7=2,%2d% = var %1d% offset - OpcodeResult __stdcall opcode_0AC7(CRunningScript *thread) - { - *thread << GetScriptParamPointer(thread); - return OR_CONTINUE; - } - - //0AC8=2,%2d% = allocate_memory_size %1d% - OpcodeResult __stdcall opcode_0AC8(CRunningScript *thread) + //0AC4=2,link_3d_audiostream %1d% to_actor %2d% + OpcodeResult __stdcall opcode_0AC4(CRunningScript *thread) { - DWORD size; *thread >> size; - - void* mem = calloc(size, 1); - if (mem) - { - DWORD oldProtect; - VirtualProtect(mem, size, PAGE_EXECUTE_READWRITE, &oldProtect); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); + auto handle = OPCODE_READ_PARAM_PED_HANDLE(); - GetInstance().OpcodeSystem.m_pAllocations.insert(mem); - } - else - LOG_WARNING(thread, "[0AC8] failed to allocate %d bytes of memory in script %s", size, ((CCustomScript*)thread)->GetInfoStr().c_str()); + auto ped = CPools::GetPed(handle); - *thread << mem; - SetScriptCondResult(thread, mem != nullptr); + stream->Link(ped); return OR_CONTINUE; } - //0AC9=1,free_allocated_memory %1d% - OpcodeResult __stdcall opcode_0AC9(CRunningScript *thread) + //0AC5=2,link_3d_audiostream %1d% to_vehicle %2d% + OpcodeResult __stdcall opcode_0AC5(CRunningScript *thread) { - void *mem; *thread >> mem; + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); - if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("[0AC9] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } + auto vehicle = CPools::GetVehicle(handle); - // allocated with 0AC8 - auto & allocs = GetInstance().OpcodeSystem.m_pAllocations; - if (allocs.find(mem) != allocs.end()) - { - free(mem); - allocs.erase(mem); - return OR_CONTINUE; // done - } - - LOG_WARNING(thread, "[0AC9] used with pointer to unknown or already freed memory in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + stream->Link(vehicle); return OR_CONTINUE; } @@ -2150,20 +1706,22 @@ namespace CLEO //0AD2=2, %2d% = player %1d% targeted_actor //IF and SET OpcodeResult __stdcall opcode_0AD2(CRunningScript *thread) { - DWORD playerId; - *thread >> playerId; - auto pPlayerPed = GetPlayerPed(playerId); + auto playerId = OPCODE_READ_PARAM_INT(); + + auto pPlayerPed = GetPlayerPed(playerId); // TODO: use plugin SDK instead auto pTargetEntity = GetWeaponTarget(pPlayerPed); + if (!pTargetEntity) pTargetEntity = (CEntity*)pPlayerPed->m_pPlayerTargettedPed; if (pTargetEntity && pTargetEntity->m_nType == ENTITY_TYPE_PED) { - *thread << GetPedPool().GetRef(reinterpret_cast(pTargetEntity)); - SetScriptCondResult(thread, true); + auto handle = CPools::GetPedRef(reinterpret_cast(pTargetEntity)); + OPCODE_WRITE_PARAM_INT(handle); + OPCODE_CONDITION_RESULT(true); } else { - *thread << -1; - SetScriptCondResult(thread, false); + OPCODE_WRITE_PARAM_INT(-1); + OPCODE_CONDITION_RESULT(false); } return OR_CONTINUE; } @@ -2186,21 +1744,29 @@ namespace CLEO char fmt[MAX_STR_LEN]; auto format = ReadStringParam(thread, fmt, sizeof(fmt)); OPCODE_VALIDATE_STR_ARG_READ(format) - size_t cExParams = 0; + auto resultType = thread->PeekDataType(); + if (!IsVariable(resultType) && IsVarString(resultType)) + { + SHOW_ERROR("Result parameter must be variable type, received '%s' in script %s \nScript suspended.", ToKindStr(resultType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } int *result = (int *)GetScriptParamPointer(thread); - SCRIPT_VAR *ExParams[35]; // read extra params + size_t cExParams = 0; + SCRIPT_VAR *ExParams[35]; for (int i = 0; i < 35; i++) { - if (*thread->GetBytePointer()) + auto paramType = thread->PeekDataType(); + if (paramType != DT_END) { ExParams[i] = GetScriptParamPointer(thread); cExParams++; } - else ExParams[i] = nullptr; + else ExParams[i] = nullptr; // clear unused args } - thread->IncPtr(); + SkipUnusedVarArgs(thread); // and var args terminator + *result = sscanf(src, format, /* extra parameters (will be aligned automatically, but the limit of 35 elements maximum exists) */ ExParams[0], ExParams[1], ExParams[2], ExParams[3], ExParams[4], ExParams[5], @@ -2214,24 +1780,20 @@ namespace CLEO return OR_CONTINUE; } - //0ADB=2,%2d% = car_model %1o% name + //0ADB=2,%2d% = car_model %1d% name OpcodeResult __stdcall opcode_0ADB(CRunningScript *thread) { - DWORD mi; - char *buf; - *thread >> mi; + DWORD modelIndex; *thread >> modelIndex; CVehicleModelInfo* model; // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster - if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) { - model = plugin::CallAndReturn(mi); - } - else { - model = reinterpret_cast(Models[mi]); - } - if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) *thread >> buf; - else buf = (char *)GetScriptParamPointer(thread); - memcpy(buf, model->m_szGameName, 8); + if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) + model = plugin::CallAndReturn(modelIndex); + else + model = reinterpret_cast(Models[modelIndex]); + + auto str = std::string(std::string_view(model->m_szGameName, sizeof(model->m_szGameName))); // to proper cstr + WriteStringParam(thread, str.c_str()); return OR_CONTINUE; } @@ -2299,24 +1861,28 @@ namespace CLEO //0AE1=7,%7d% = find_actor_near_point %1d% %2d% %3d% in_radius %4d% find_next %5h% pass_deads %6h% //IF and SET OpcodeResult __stdcall opcode_0AE1(CRunningScript *thread) { - CVector center; - float radius; - DWORD next, pass_deads; + CVector center = {}; + center.x = OPCODE_READ_PARAM_FLOAT(); + center.y = OPCODE_READ_PARAM_FLOAT(); + center.z = OPCODE_READ_PARAM_FLOAT(); + auto radius = OPCODE_READ_PARAM_FLOAT(); + auto findNext = OPCODE_READ_PARAM_BOOL(); + auto passDead = OPCODE_READ_PARAM_INT(); + static DWORD stat_last_found = 0; - auto& pool = GetPedPool(); - *thread >> center >> radius >> next >> pass_deads; + auto& pool = *CPools::ms_pPedPool; DWORD& last_found = reinterpret_cast(thread)->IsCustom() ? reinterpret_cast(thread)->GetLastSearchPed() : stat_last_found; - if (!next) last_found = 0; + if (!findNext) last_found = 0; for (int index = last_found; index < pool.m_nSize; ++index) { if (auto obj = pool.GetAt(index)) { - if (pass_deads != -1 && (obj->IsPlayer() || (pass_deads && !IsAvailable(obj))/* || obj->GetOwner() == 2*/ || obj->m_nPedFlags.bFadeOut)) + if (passDead != -1 && (obj->IsPlayer() || (passDead && !IsAvailable(obj))/* || obj->GetOwner() == 2*/ || obj->m_nPedFlags.bFadeOut)) continue; if (radius >= 1000.0f || (VectorSqrMagnitude(obj->GetPosition() - center) <= radius * radius)) @@ -2325,41 +1891,45 @@ namespace CLEO //if(last_found >= (unsigned)pool.GetSize()) last_found = 0; //obj->PedCreatedBy = 2; // add reference to found actor - *thread << pool.GetRef(obj); - SetScriptCondResult(thread, true); + auto found = pool.GetRef(obj); + OPCODE_WRITE_PARAM_INT(found); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } } } - *thread << -1; last_found = 0; - SetScriptCondResult(thread, false); + OPCODE_WRITE_PARAM_INT(-1); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } //0AE2=7,%7d% = find_vehicle_near_point %1d% %2d% %3d% in_radius %4d% find_next %5h% pass_wrecked %6h% //IF and SET OpcodeResult __stdcall opcode_0AE2(CRunningScript *thread) { - CVector center; - float radius; - DWORD next, pass_wrecked; - static DWORD stat_last_found = 0; + CVector center = {}; + center.x = OPCODE_READ_PARAM_FLOAT(); + center.y = OPCODE_READ_PARAM_FLOAT(); + center.z = OPCODE_READ_PARAM_FLOAT(); + auto radius = OPCODE_READ_PARAM_FLOAT(); + auto findNext = OPCODE_READ_PARAM_BOOL(); + auto passWreck = OPCODE_READ_PARAM_INT(); - auto& pool = GetVehiclePool(); - *thread >> center >> radius >> next >> pass_wrecked; + static DWORD stat_last_found = 0; + auto& pool = *CPools::ms_pVehiclePool; DWORD& last_found = reinterpret_cast(thread)->IsCustom() ? reinterpret_cast(thread)->GetLastSearchVehicle() : stat_last_found; - if (!next) last_found = 0; + if (!findNext) last_found = 0; for (int index = last_found; index < pool.m_nSize; ++index) { if (auto obj = pool.GetAt(index)) { - if ((pass_wrecked && IsWrecked(obj)) || (/*obj->GetOwner() == 2 ||*/ obj->m_nVehicleFlags.bFadeOut)) + if ((passWreck && IsWrecked(obj)) || (/*obj->GetOwner() == 2 ||*/ obj->m_nVehicleFlags.bFadeOut)) continue; if (radius >= 1000.0f || (VectorSqrMagnitude(obj->GetPosition() - center) <= radius * radius)) @@ -2367,33 +1937,37 @@ namespace CLEO last_found = index + 1; // on next opcode call start search from next index //if(last_found >= (unsigned)pool.GetSize()) last_found = 0; // obj.referenceType = 2; // add reference to found actor - *thread << pool.GetRef(obj); - SetScriptCondResult(thread, true); - return OR_CONTINUE; + + auto found = pool.GetRef(obj); + OPCODE_WRITE_PARAM_INT(found); + OPCODE_CONDITION_RESULT(true); } } } - *thread << -1; last_found = 0; - SetScriptCondResult(thread, false); + OPCODE_WRITE_PARAM_INT(-1); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } //0AE3=6,%6d% = find_object_near_point %1d% %2d% %3d% in_radius %4d% find_next %5h% //IF and SET OpcodeResult __stdcall opcode_0AE3(CRunningScript *thread) { - CVector center; - float radius; - DWORD next; + CVector center = {}; + center.x = OPCODE_READ_PARAM_FLOAT(); + center.y = OPCODE_READ_PARAM_FLOAT(); + center.z = OPCODE_READ_PARAM_FLOAT(); + auto radius = OPCODE_READ_PARAM_FLOAT(); + auto findNext = OPCODE_READ_PARAM_BOOL(); + static DWORD stat_last_found = 0; - auto& pool = GetObjectPool(); - *thread >> center >> radius >> next; + auto& pool = *CPools::ms_pObjectPool; auto cs = reinterpret_cast(thread); DWORD& last_found = cs->IsCustom() ? cs->GetLastSearchObject() : stat_last_found; - if (!next) last_found = 0; + if (!findNext) last_found = 0; for (int index = last_found; index < pool.m_nSize; ++index) { @@ -2406,53 +1980,17 @@ namespace CLEO last_found = index + 1; // on next opcode call start search from next index //if(last_found >= (unsigned)pool.GetSize()) last_found = 0; // obj.referenceType = 2; // add reference to found actor - *thread << pool.GetRef(obj); - SetScriptCondResult(thread, true); - return OR_CONTINUE; + + auto found = pool.GetRef(obj); + OPCODE_WRITE_PARAM_INT(found); + OPCODE_CONDITION_RESULT(true); } } } last_found = 0; - *thread << -1; - SetScriptCondResult(thread, false); - return OR_CONTINUE; - } - - //0AE9=0,pop_float - OpcodeResult __stdcall opcode_0AE9(CRunningScript *thread) - { - float result; - _asm fstp result - opcodeParams[0].fParam = result; - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - - //0AEA=2,%2d% = actor_struct %1d% handle - OpcodeResult __stdcall opcode_0AEA(CRunningScript *thread) - { - CPed *struc; - *thread >> struc; - *thread << GetPedPool().GetRef(struc); - return OR_CONTINUE; - } - - //0AEB=2,%2d% = car_struct %1d% handle - OpcodeResult __stdcall opcode_0AEB(CRunningScript *thread) - { - CVehicle *struc; - *thread >> struc; - *thread << GetVehiclePool().GetRef(struc); - return OR_CONTINUE; - } - - //0AEC=2,%2d% = object_struct %1d% handle - OpcodeResult __stdcall opcode_0AEC(CRunningScript *thread) - { - CObject *struc; - *thread >> struc; - *thread << GetObjectPool().GetRef(struc); + OPCODE_WRITE_PARAM_INT(-1); + OPCODE_CONDITION_RESULT(false); return OR_CONTINUE; } @@ -2493,62 +2031,6 @@ namespace CLEO return OR_CONTINUE; } - //2000=2,%2s% = resolve_filepath %1s% - OpcodeResult __stdcall opcode_2000(CRunningScript* thread) - { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) - auto resolved = reinterpret_cast(thread)->ResolvePath(path); - auto ok = WriteStringParam(thread, resolved.c_str()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) - return OR_CONTINUE; - } - - //2001=3,%3s% = get_script_filename %1d% full_path %2d% // IF and SET - OpcodeResult __stdcall opcode_2001(CRunningScript* thread) - { - CCustomScript* script; - DWORD fullPath; - *thread >> script >> fullPath; - - if((int)script == -1) - { - script = (CCustomScript*)thread; // current script - } - else - { - if(!GetInstance().ScriptEngine.IsValidScriptPtr(script)) - { - CLEO_SkipOpcodeParams(thread, 1); // no result text - SetScriptCondResult(thread, false); // invalid input param - return OR_CONTINUE; - } - } - - if(fullPath != 0) - { - const size_t len = - strlen(script->GetScriptFileDir()) + - 1 + // path separator - strlen(script->GetScriptFileName()); - - std::string path; - path.reserve(len); - - path = script->GetScriptFileDir(); - path.push_back('\\'); - path.append(script->GetScriptFileName()); - path = script->ResolvePath(path.c_str()); // real absolute path - - auto ok = WriteStringParam(thread, path.c_str()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) - } - else - { - auto ok = WriteStringParam(thread, script->GetScriptFileName()); OPCODE_VALIDATE_STR_ARG_WRITE(ok) - } - - SetScriptCondResult(thread, true); - return OR_CONTINUE; - } - //2002=-1, cleo_return_with ... OpcodeResult __stdcall opcode_2002(CRunningScript* thread) { @@ -2579,29 +2061,6 @@ namespace CLEO SetScriptCondResult(thread, false); return GetInstance().OpcodeSystem.CleoReturnGeneric(0x2003, thread); } - - //2004=1,forget_memory %1d% - OpcodeResult __stdcall opcode_2004(CRunningScript* thread) - { - void* mem; *thread >> mem; - - if ((size_t)mem <= CCustomOpcodeSystem::MinValidAddress) - { - SHOW_ERROR("[2004] used with invalid '0x%X' pointer argument in script %s\nScript suspended.", mem, ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - - // allocated with 0AC8 - auto& allocs = GetInstance().OpcodeSystem.m_pAllocations; - if (allocs.find(mem) != allocs.end()) - { - allocs.erase(mem); - return OR_CONTINUE; // done - } - - LOG_WARNING(thread, "[2004] used with pointer to unknown or already freed memory in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - return OR_CONTINUE; - } } @@ -2682,6 +2141,52 @@ extern "C" *outNeedsTerminator = target.needTerminator; } + DWORD WINAPI CLEO_PeekIntOpcodeParam(CLEO::CRunningScript* thread) + { + // store state + auto param = opcodeParams[0]; + auto ip = thread->CurrentIP; + auto count = GetInstance().OpcodeSystem.handledParamCount; + + GetScriptParams(thread, 1); + DWORD result = opcodeParams[0].dwParam; + + // restore state + thread->CurrentIP = ip; + GetInstance().OpcodeSystem.handledParamCount = count; + opcodeParams[0] = param; + + return result; + } + + float WINAPI CLEO_PeekFloatOpcodeParam(CLEO::CRunningScript* thread) + { + // store state + auto param = opcodeParams[0]; + auto ip = thread->CurrentIP; + auto count = GetInstance().OpcodeSystem.handledParamCount; + + GetScriptParams(thread, 1); + float result = opcodeParams[0].fParam; + + // restore state + thread->CurrentIP = ip; + GetInstance().OpcodeSystem.handledParamCount = count; + opcodeParams[0] = param; + + return result; + } + + SCRIPT_VAR* WINAPI CLEO_GetOpcodeParamsArray() + { + return opcodeParams; + } + + DWORD WINAPI CLEO_GetParamsHandledCount() + { + return GetInstance().OpcodeSystem.handledParamCount; + } + void WINAPI CLEO_WriteStringOpcodeParam(CLEO::CRunningScript* thread, const char* str) { if(!WriteStringParam(thread, str)) @@ -2758,6 +2263,8 @@ extern "C" break; } } + + GetInstance().OpcodeSystem.handledParamCount += count; } void WINAPI CLEO_SkipUnusedVarArgs(CLEO::CRunningScript* thread) @@ -2843,7 +2350,17 @@ extern "C" CLEO::CRunningScript* WINAPI CLEO_GetLastCreatedCustomScript() { - return lastScriptCreated; + return GetInstance().ScriptEngine.LastScriptCreated; + } + + CLEO::CRunningScript* WINAPI CLEO_GetScriptByName(const char* threadName, BOOL standardScripts, BOOL customScripts, DWORD resultIndex) + { + return GetInstance().ScriptEngine.FindScriptNamed(threadName, standardScripts, customScripts, resultIndex); + } + + CLEO::CRunningScript* WINAPI CLEO_GetScriptByFilename(const char* path, DWORD resultIndex) + { + return GetInstance().ScriptEngine.FindScriptByFilename(path, resultIndex); } void WINAPI CLEO_AddScriptDeleteDelegate(FuncScriptDeleteDelegateT func) diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 7885d4a9..ebf98472 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -20,6 +20,7 @@ namespace CLEO static const size_t LastOriginalOpcode = 0x0A4E; // GTA SA static const size_t LastCustomOpcode = 0x7FFF; + static std::set ProtectedOpcodes; // these can not be overwritten // most recently processed static CRunningScript* lastScript; @@ -28,6 +29,7 @@ namespace CLEO static WORD lastCustomOpcode; static std::string lastErrorMsg; static WORD prevOpcode; // previous + static BYTE handledParamCount; // read/writen since current opcode handling started void FinalizeScriptObjects(); @@ -47,12 +49,6 @@ namespace CLEO private: friend OpcodeResult __stdcall opcode_0AA2(CRunningScript *pScript); friend OpcodeResult __stdcall opcode_0AA3(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0AC8(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0AC9(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_2004(CRunningScript* pScript); - - std::set m_hNativeLibs; - std::set m_pAllocations; typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); @@ -92,6 +88,4 @@ namespace CLEO inline CRunningScript& operator<<(CRunningScript& thread, int nval); inline CRunningScript& operator>>(CRunningScript& thread, float& fval); inline CRunningScript& operator<<(CRunningScript& thread, float fval); - inline CRunningScript& operator>>(CRunningScript& thread, CVector& vec); - inline CRunningScript& operator<<(CRunningScript& thread, const CVector& vec); } diff --git a/source/CDebug.h b/source/CDebug.h index 52a06d7d..89dd1ae2 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -1,10 +1,6 @@ #pragma once #include -#define TRACE(format,...) {Debug.Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} -#define LOG_WARNING(script, format, ...) {Debug.Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} -#define SHOW_ERROR(a,...) {Debug.Error(a, __VA_ARGS__);} - std::string stringPrintf(const char* format, ...); namespace CLEO diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 8a071eb4..e807953f 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -17,60 +17,39 @@ namespace CLEO CPluginSystem() { std::set loaded; - - TRACE("Loading plugins..."); - - auto path = FS::path(Filepath_Cleo).append("cleo_plugins").string(); - FilesWalk(path.c_str(), ".cleo", [&](const char* fullPath, const char* filename) - { - std::string name = filename; - std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); - - if(loaded.find(name) == loaded.end()) - { - TRACE("Loading plugin '%s'", fullPath); - HMODULE hlib = LoadLibrary(fullPath); - if (!hlib) - { - LOG_WARNING(0, "Error loading plugin '%s'", fullPath); - } - else - { - loaded.insert(name); - plugins.push_back(hlib); - } - } - else + auto LoadPluginsDir = [&](std::string path, std::string extension) + { + FilesWalk(path.c_str(), extension.c_str(), [&](const char* fullPath, const char* filename) { - LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); - } - }); + std::string name = filename; + name.resize(name.length() - extension.length()); // cut off file type + std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); - // load plugins from legacy location - FilesWalk(Filepath_Cleo.c_str(), ".cleo", [&](const char* fullPath, const char* filename) - { - std::string name = filename; - std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); - - if(loaded.find(name) == loaded.end()) - { - TRACE("Loading plugin '%s'", fullPath); - HMODULE hlib = LoadLibrary(fullPath); - if (!hlib) + if (loaded.find(name) == loaded.end()) { - LOG_WARNING(0, "Error while loading plugin '%s'", fullPath); + TRACE("Loading plugin '%s'", fullPath); + HMODULE hlib = LoadLibrary(fullPath); + if (!hlib) + { + LOG_WARNING(0, "Error loading plugin '%s'", fullPath); + } + else + { + loaded.insert(name); + plugins.push_back(hlib); + } } else { - loaded.insert(name); - plugins.push_back(hlib); + LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); } - } - else - { - LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); - } - }); + }); + }; + + TRACE("Loading plugins..."); + LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), ".cleo5"); // CLEO5 plugins + LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), ".cleo"); // legacy plugins + LoadPluginsDir(Filepath_Cleo.c_str(), ".cleo"); // legacy plugins location } ~CPluginSystem() diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 01aba42c..14bffd73 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -83,6 +83,8 @@ namespace CLEO push count call FUNC_GetScriptParams } + + GetInstance().OpcodeSystem.handledParamCount += count; } void __fastcall _TransmitScriptParams(CRunningScript *pScript, int dummy, CRunningScript *pScriptB) @@ -103,6 +105,8 @@ namespace CLEO push count call FUNC_SetScriptParams } + + GetInstance().OpcodeSystem.handledParamCount += count; } void __fastcall _SetScriptCondResult(CRunningScript *pScript, int dummy, int val) @@ -165,6 +169,29 @@ namespace CLEO return CLEO::eCLEO_Version::CLEO_VER_CUR; } + LPCSTR WINAPI CLEO_GetScriptFilename(const CRunningScript* thread) + { + if (!GetInstance().ScriptEngine.IsValidScriptPtr(thread)) + { + return nullptr; + } + + auto cs = (CCustomScript*)thread; + return cs->GetScriptFileName(); + } + + LPCSTR WINAPI CLEO_GetScriptWorkDir(const CRunningScript* thread) + { + auto cs = (CCustomScript*)thread; + return cs->GetWorkDir(); + } + + void WINAPI CLEO_SetScriptWorkDir(CRunningScript* thread, const char* path) + { + auto cs = (CCustomScript*)thread; + cs->SetWorkDir(path); + } + SCRIPT_VAR *opcodeParams; SCRIPT_VAR *missionLocals; CRunningScript *staticThreads; @@ -182,7 +209,6 @@ namespace CLEO BYTE *scriptTexts; CRunningScript **inactiveThreadQueue, **activeThreadQueue; - CCustomScript *lastScriptCreated = nullptr; extern "C" void __stdcall opcode_004E(CCustomScript *pScript) @@ -792,11 +818,6 @@ namespace CLEO inj.InjectFunction(&opcode_004E_hook, gvm.TranslateMemoryAddress(MA_OPCODE_004E)); } - CScriptEngine::CScriptEngine() - { - CustomMission = nullptr; - } - CScriptEngine::~CScriptEngine() { GameEnd(); @@ -1029,27 +1050,95 @@ namespace CLEO } } - CRunningScript *CScriptEngine::FindScriptNamed(const char *name) + CRunningScript* CScriptEngine::FindScriptNamed(const char* threadName, bool standardScripts, bool customScripts, size_t resultIndex) { - for (auto script = *activeThreadQueue; script; script = script->GetNext()) + if (standardScripts) + { + for (auto script = *activeThreadQueue; script; script = script->GetNext()) + { + if (_strnicmp(threadName, script->Name, sizeof(script->Name)) == 0) + { + if (resultIndex == 0) return script; + else resultIndex--; + } + } + } + + if (customScripts) { - if (_stricmp(name, script->GetName().c_str()) == 0) - return script; + if (CustomMission) + { + if (_strnicmp(threadName, CustomMission->Name, sizeof(CustomMission->Name)) == 0) + { + if (resultIndex == 0) return CustomMission; + else resultIndex--; + } + } + + for (auto it = CustomScripts.begin(); it != CustomScripts.end(); ++it) + { + auto cs = *it; + if (_strnicmp(threadName, cs->Name, sizeof(cs->Name)) == 0) + { + if (resultIndex == 0) return cs; + else resultIndex--; + } + } } + return nullptr; } - CCustomScript *CScriptEngine::FindCustomScriptNamed(const char *name) + + CRunningScript* CScriptEngine::FindScriptByFilename(const char* path, size_t resultIndex) { - if (CustomMission) + if (path == nullptr) return nullptr; + + auto pathLen = strlen(path); + auto CheckScript = [&](CRunningScript* script) + { + if (script == nullptr) return false; + + auto cs = (CCustomScript*)script; + std::string scriptPath = cs->GetScriptFileDir(); + scriptPath += '\\'; + scriptPath += cs->GetScriptFileName(); + + if (scriptPath.length() < pathLen) return false; + + auto startPos = scriptPath.length() - pathLen; + if (_strnicmp(path, scriptPath.c_str() + startPos, pathLen) == 0) + { + if (startPos > 0 && path[startPos - 1] != '\\') return false; // whole file/dir name must match + + return true; + } + }; + + // standard scripts + for (auto script = *activeThreadQueue; script; script = script->GetNext()) + { + if (CheckScript(script)) + { + if (resultIndex == 0) return script; + else resultIndex--; + } + } + + // custom scripts + if (CheckScript(CustomMission)) { - if (_stricmp(name, CustomMission->GetName().c_str()) == 0) return CustomMission; + if (resultIndex == 0) return CustomMission; + else resultIndex--; } for (auto it = CustomScripts.begin(); it != CustomScripts.end(); ++it) { auto cs = *it; - if (_stricmp(name, cs->GetName().c_str()) == 0) - return cs; + if (CheckScript(cs)) + { + if (resultIndex == 0) return cs; + else resultIndex--; + } } return nullptr; @@ -1348,7 +1437,7 @@ namespace CLEO memcpy(Name, fName.c_str(), len); } } - lastScriptCreated = this; + GetInstance().ScriptEngine.LastScriptCreated = this; bOK = true; } catch (std::exception& e) @@ -1365,7 +1454,8 @@ namespace CLEO { if (BaseIP && !bIsMission) delete[] BaseIP; RunScriptDeleteDelegate(reinterpret_cast(this)); - if (lastScriptCreated == this) lastScriptCreated = nullptr; + + if (GetInstance().ScriptEngine.LastScriptCreated == this) GetInstance().ScriptEngine.LastScriptCreated = nullptr; } float VectorSqrMagnitude(CVector vector) { return vector.x * vector.x + vector.y * vector.y + vector.z * vector.z; } diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index dcf18703..1507a9aa 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -97,16 +97,18 @@ namespace CLEO class CScriptEngine : VInjectible { + public: bool gameInProgress = false; friend class CCustomScript; std::list CustomScripts; std::list ScriptsWaitingForDelete; std::set InactiveScriptHashes; - CCustomScript *CustomMission; + CCustomScript *CustomMission = nullptr; + CCustomScript *LastScriptCreated = nullptr; CCustomScript *LoadScript(const char *szFilePath); - public: + bool NativeScriptsDebugMode; // debug mode enabled? std::string MainScriptFileDir; std::string MainScriptFileName; @@ -114,7 +116,7 @@ namespace CLEO static SCRIPT_VAR CleoVariables[0x400]; - CScriptEngine(); + CScriptEngine() = default; ~CScriptEngine(); virtual void Inject(CCodeInjector&); @@ -128,8 +130,8 @@ namespace CLEO void LoadState(int saveSlot); void SaveState(); - CRunningScript* FindScriptNamed(const char *); - CCustomScript* FindCustomScriptNamed(const char*); + CRunningScript* FindScriptNamed(const char* threadName, bool standardScripts, bool customScripts, size_t resultIndex = 0); // can be called multiple times to find more scripts named threadName. resultIndex should be incremented until the method returns nullptr + CRunningScript* FindScriptByFilename(const char* path, size_t resultIndex = 0); // if path is not absolute it will be resolved with cleo directory as root bool IsValidScriptPtr(const CRunningScript*) const; // leads to active script? (regular or custom) void AddCustomScript(CCustomScript*); void RemoveCustomScript(CCustomScript*); @@ -162,13 +164,10 @@ namespace CLEO } extern "C" { - extern SCRIPT_VAR *opcodeParams; - extern SCRIPT_VAR *missionLocals; extern CRunningScript *staticThreads; } extern BYTE *scmBlock, *missionBlock; - extern CCustomScript *lastScriptCreated; extern float VectorSqrMagnitude(CVector vector); } diff --git a/source/cleo.def b/source/cleo.def index 3174d3ab..2c85e6de 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -38,3 +38,12 @@ EXPORTS _CLEO_SetScriptDebugMode@8 @35 _CLEO_Log@8 @36 _CLEO_ReadStringParamWriteBuffer@16 @37 + _CLEO_GetOpcodeParamsArray@0 @38 + _CLEO_GetParamsHandledCount@0 @39 + _CLEO_PeekIntOpcodeParam@4 @40 + _CLEO_PeekFloatOpcodeParam@4 @41 + _CLEO_GetScriptByName@16 @42 + _CLEO_GetScriptByFilename@8 @43 + _CLEO_GetScriptFilename@4 @44 + _CLEO_GetScriptWorkDir@4 @45 + _CLEO_SetScriptWorkDir@8 @46 \ No newline at end of file diff --git a/source/stdafx.h b/source/stdafx.h index 734c39af..8a52a3a0 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -47,18 +47,11 @@ static const std::string Filepath_Log = FS::path(Filepath_Cleo).append(".cleo.lo #include #include "..\cleo_sdk\CLEO.h" +#include "..\cleo_sdk\CLEO_Utils.h" #include "CTheScripts.h" -#define CPOOL_USE_HANDLE_ACCESS - -//#define VALIDATE_SIZE(struc, size) static_assert(sizeof(struc) == size, #struc " (Invalid Structure Size)") - #define NUM_SCAN_ENTITIES 16 -using CPedPool = CPool; -using CVehiclePool = CPool; -using CObjectPool = CPool; -using CBuildingPool = CPool; using RGBA = CRGBA; using RwV3D = RwV3d; struct RwRect2D; diff --git a/tests/FilesystemOperations/0A99.s b/tests/FilesystemOperations/0A99.s new file mode 100644 index 0000000000000000000000000000000000000000..44f9c7396a2b3697eca5c2fe3d755a1edd865a73 GIT binary patch literal 952 zcmbW0ze~eF6vy8+Nrw#i2XweniV7-ew|0?WTM>Uk?IM9v)58X8TJFxFOHls>C!Gan z{{|ObRs0X!y1Ak0-D#F?JLU2tkes!sEV?Y1Ya@qzaI$L1F{rF5vF8^r(xJtY#DW8CrP5OQ90(YW=h_{#~N&!wFi5;s)f+qf<`*8_+ fMvnt1#>3QODJx4?+9qnVu}83@_y0W1MQ#2C2A&IA literal 0 HcmV?d00001 diff --git a/tests/FilesystemOperations/0A99.txt b/tests/FilesystemOperations/0A99.txt new file mode 100644 index 00000000..ddea5a7d --- /dev/null +++ b/tests/FilesystemOperations/0A99.txt @@ -0,0 +1,109 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A99" // set_current_directory +debug_on + +trace "0A99 (set_current_directory)" + + +wait 0 +// set current script location as work dir +0A99: set_current_directory ".\\" // tested opcode + +0@ = allocate_memory 260 +string_format 0@ {format} "" +resolve_filepath 0@ {store_to} 0@ + +cleo_call @TEXT_LENGTH {args} 1 0@ {result} 1@ +if + cleo_call @TEXT_ENDS_WITH {args} 2 0@ "\cleo\cleo_tests\FilesystemOperations" +then + trace "~g~~h~~h~0A99 (set_current_directory), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A99 (set_current_directory), #0 FAILED! Current script dir: %s" 0@ +end +free_memory 0@ + + +wait 0 +// set virtual absolute path +0A99: set_current_directory "cleo:" // tested opcode + +0@ = allocate_memory 260 +string_format 0@ {format} "" +resolve_filepath 0@ {store_to} 0@ + +cleo_call @TEXT_LENGTH {args} 1 0@ {result} 1@ +if + cleo_call @TEXT_ENDS_WITH {args} 2 0@ "\cleo" +then + trace "~g~~h~~h~0A99 (set_current_directory), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0A99 (set_current_directory), #1 FAILED! Current script dir: %s" 0@ +end +free_memory 0@ + + +wait 0 +// predefined number +0A99: set_current_directory 1 // tested opcode + +0@ = allocate_memory 260 +string_format 0@ {format} "" +resolve_filepath 0@ {store_to} 0@ + +cleo_call @TEXT_LENGTH {args} 1 0@ {result} 1@ +if + cleo_call @TEXT_ENDS_WITH {args} 2 0@ "\GTA San Andreas User Files" +then + trace "~g~~h~~h~0A99 (set_current_directory), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0A99 (set_current_directory), #2 FAILED! Current script dir: %s" 0@ +end +free_memory 0@ + + +terminate_this_custom_script + + +:TEXT_LENGTH + call_function_return 0x00826330 args 1 pop 1 0@ result 5@ +cleo_return_with true 5@ + + +:TEXT_ENDS_WITH + cleo_call @TEXT_LENGTH {args} 1 0@ {result} 5@ + cleo_call @TEXT_LENGTH {args} 1 1@ {result} 6@ + if + 5@ < 6@ + then + cleo_return_with false + end + + 5@ -= 6@ + 0@ += 5@ + + call_function_return 0x0081E568 num_params 2 pop 0 0@ 1@ result 5@ // int lstrcmpiA(char* str, char* str) + if + 5@ == 0 + then + 5@ = true + else + 5@ = false + end +cleo_return_with 5@ + diff --git a/tests/FilesystemOperations/0A9A.s b/tests/FilesystemOperations/0A9A.s new file mode 100644 index 0000000000000000000000000000000000000000..9e4f0e55ca874bc54e6dcfb0506d34f1535e7891 GIT binary patch literal 359 zcmZ3&%*SHjXz6&Efl-l90K`(z$S+9Ei%-kUN!4UvWME;q2E?x}E2^u@fFg9mbQF{g6x75ssAq^2n3 z<>x7+R%8~JWagzSfULAYH^8PPJTMv ziFf}IHq=l7YUpe(pud1FnGMtg^p`CGe;LC4<(3HaM2bR5z5*x|!Cum1n9KzLd6911 literal 0 HcmV?d00001 diff --git a/tests/FilesystemOperations/0A9A.txt b/tests/FilesystemOperations/0A9A.txt new file mode 100644 index 00000000..1db554b7 --- /dev/null +++ b/tests/FilesystemOperations/0A9A.txt @@ -0,0 +1,48 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A9A" // open_file +debug_on + +trace "0A9A (open_file)" + + +wait 0 +// try open non existing file +if + // test 0A9A + 0@ = open_file "cleo\not_a_file.txt" {mode} "r" // tested opcode +then + breakpoint "~r~~h~~h~~h~0A9A (open_file), #0 FAILED! Opened non existing file?" +else + trace "~g~~h~~h~0A9A (open_file), #0 PASSED" +end + + +wait 0 +// try open non existing file +if + // test 0A9A + 0@ = open_file "cleo\.cleo_test.log" {mode} "r" // tested opcode +then + trace "~g~~h~~h~0A9A (open_file), #1 PASSED" + close_file 0@ +else + breakpoint "~r~~h~~h~~h~0A9A (open_file), #1 FAILED! Failed to open file." +end + + +terminate_this_custom_script diff --git a/tests/FilesystemOperations/0A9B.s b/tests/FilesystemOperations/0A9B.s new file mode 100644 index 0000000000000000000000000000000000000000..d9917f8681a2bf217f3df80e983bcf725919e7b0 GIT binary patch literal 458 zcmZ3&%*SHjXz6sAfl-l95X4f@NY2SGPK{5?%t_T`U}RunxCX?txcG#Vb5iqT^gv{M zNosM4UQT{GA5)PwGXsM!17p+w|NntTsMe*|)n$MXhFLla$_5Gnj={mME(}ZzjP^hc z42pbqbwyAOIQ6(WdiuD!C@Q!m0-cniP?E2ZUyz!o0CJ!n*nzXTfNmgYxgpGQqW!e) d9}#{sf*A|+Q#McoF@7>qppu`AVU|zk0svh%hyMTo literal 0 HcmV?d00001 diff --git a/tests/FilesystemOperations/0A9B.txt b/tests/FilesystemOperations/0A9B.txt new file mode 100644 index 00000000..5c9d9427 --- /dev/null +++ b/tests/FilesystemOperations/0A9B.txt @@ -0,0 +1,57 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A9B" // close_file +debug_on + +trace "0A9B (close_file)" + + +wait 0 +// open file +if + 0@ = open_file "cleo\.cleo_test.log" {mode} "r+" +then + trace "~g~~h~~h~0A9B (close_file), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9B (close_file), #0 FAILED! Failed to open file." +end + + +wait 0 +// close file +0A9B: close_file 0@ // tested opcode +trace "~g~~h~~h~0A9B (close_file), #1 PASSED" + + +wait 0 +// open file again +if + 0@ = open_file "cleo\.cleo_test.log" {mode} "r+" +then + trace "~g~~h~~h~0A9B (close_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9B (close_file), #2 FAILED! Failed to open file." +end + + +wait 0 +// close file again +0A9B: close_file 0@ // tested opcode +trace "~g~~h~~h~0A9B (close_file), #3 PASSED" + + +terminate_this_custom_script diff --git a/tests/FilesystemOperations/0A9C.s b/tests/FilesystemOperations/0A9C.s new file mode 100644 index 0000000000000000000000000000000000000000..260fef28cc492369de6ad699eda61316e95d8026 GIT binary patch literal 390 zcmZ3&%*SHjXz6^Ifl-l91juq!&`3`$iBHSSNsTYgtV-2nU}RunxCX?txcE4db5is5 z5{onW7>k%07m5$K{6g_3-Q{DRax1(5snz)qXP1#}5BBiM 0 +then + trace "~g~~h~~h~0A9C (get_file_size), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9C (get_file_size), #1 FAILED! File size: %d" 1@ +end + + +close_file 0@ +trace "~g~~h~~h~0A9C (get_file_size), #2 PASSED" + + +terminate_this_custom_script diff --git a/tests/FilesystemOperations/0A9D.s b/tests/FilesystemOperations/0A9D.s new file mode 100644 index 0000000000000000000000000000000000000000..2ab2c20f4fb3cbc056da527443dd254fdd9ed86c GIT binary patch literal 1246 zcmZ3&%*SHjXz6m8fl-l96vR@{C`wICiBBua&y7#Z%t_T`U}RunxCX?txcE4db5is5 z5{onW7>k%07;b^)8-)@A z+x`+60!Bz7U<1@dYzP?P3IS6`6Bn{WU^Xa{iAsmYFn8lk*6aTe84Pgafaxv+sEOEQ zZ43(ra+5VMfwKV9GbD90ae$Jz0=5*69>G_C6X`P(SU>@N77f%yYy_LYe1@LH=@h}H JFn3Sp0sz5gbTj|} literal 0 HcmV?d00001 diff --git a/tests/FilesystemOperations/0A9D.txt b/tests/FilesystemOperations/0A9D.txt new file mode 100644 index 00000000..f4dd5141 --- /dev/null +++ b/tests/FilesystemOperations/0A9D.txt @@ -0,0 +1,114 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + + +script_name "0A9D" // read_from_file +debug_on + +trace "0A9D (read_from_file)" + + +wait 0 +// open the file +if + 0@ = open_file "cleo.asi" {mode} "r" +then + trace "~g~~h~~h~0A9D (read_from_file), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9D (read_from_file), #0 FAILED! Failed to open file." +end + + +wait 0 +// read 0 bytes +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +0A9D: read_from_file 0@ {size} 0 {destination} 2@ // tested opcode + +if and + 1@ == 0xcccccccc + 2@ == 0x00000000 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~0A9D (read_from_file), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9D (read_from_file), #1 FAILED!~n~cccccccc 00000000 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// read 2 bytes +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +0A9D: read_from_file 0@ {size} 2 {destination} 2@ // tested opcode + +if and + 1@ == 0xcccccccc + 2@ == 0x00005A4D // DOS "MZ" header + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~0A9D (read_from_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9D (read_from_file), #2 FAILED!~n~cccccccc 00005A4D eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +close_file 0@ +trace "~g~~h~~h~0A9D (read_from_file), #3 PASSED" + + +wait 0 +// open the file +if + 0@ = open_file "cleo.asi" {mode} "r" +then + trace "~g~~h~~h~0A9D (read_from_file), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9D (read_from_file), #3 FAILED! Failed to open file." +end + + +wait 0 +// read 2 bytes into array +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee +4@ = 2 // array index + +0A9D: read_from_file 0@ {size} 2 {destination} 0@(4@,32i) // tested opcode + +if and + 1@ == 0xcccccccc + 2@ == 0x00005A4D // DOS "MZ" header + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~0A9D (read_from_file), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9D (read_from_file), #4 FAILED!~n~cccccccc 00005A4D eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +close_file 0@ +trace "~g~~h~~h~0A9D (read_from_file), #5 PASSED" + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0A8C.s b/tests/MemoryOperations/0A8C.s new file mode 100644 index 0000000000000000000000000000000000000000..750df0f1490d6b1626d8a39019069b26344dda0b GIT binary patch literal 1910 zcmciCJ4?e*6ae6Jlc>-kAZ|{#iZAGpS4-_61aAdJQRoj4NrP6bN@%ez`32n^+{M`+ z;40|iELGqg!^VRAhkfc=!qeNUQt4#~+D#YQ4*b!u)It7f!(gZ|t$9jETOW$Za@ADjED1&Og>XxC%0gpCPU=n;$CPf-h<4z#eW zL@cB@3-gzSfsF}R7!ix>4^azakcDX`WFf~{FxER4v)Gt`g&DE9dKa}Y`&(G8NMtUQ zI1Ah)J!fPR8xydwA{G~Kq83&^3tcyi#O7io&Vn)4wqUqxad)cNn1Dq#VsZZ3vZxH- zo!O2>cRdzpXNP8Jdps;F!zTofjqTLxI|qfx_6+yv|5xXj7zJY?g^Ya(P~;*M%@wYz)kd z42)-h0LWruV7vSBx%4h}$H5e^3qaz6^|I|Ns97nxs{iURRd^LfDPeQBXEe2yhGzc6DK3Vqi1{ m>S9pjGp;Lw>cX$l&C%1x)kU!`uTC`uXg4zx12fP$leqvIoHYml literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0A96.txt b/tests/MemoryOperations/0A96.txt new file mode 100644 index 00000000..c6260775 --- /dev/null +++ b/tests/MemoryOperations/0A96.txt @@ -0,0 +1,34 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A96" // get_ped_pointer +debug_on + +trace "0A96 (get_ped_pointer)" + +wait 0 +get_player_char 0 {handle} 0@ +1@ = 0 +0A96: get_ped_pointer 0@ {address} 1@ + +if + 1@ > 0x10000 // possibly valid pointer +then + trace "~g~~h~~h~0A96 (get_ped_pointer), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A96 (get_ped_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ +end + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0A97.s b/tests/MemoryOperations/0A97.s new file mode 100644 index 0000000000000000000000000000000000000000..18033d4f5f6cdd3abef66f6c10b2bdf7b6d4e969 GIT binary patch literal 240 zcmZ3&%*SHjXlZ_!fl-l93dB;-NKY+^FH6nHOwLJ-FUZf#D@iTVWME`qVQ^<+oxs@5 zyp#b*u`vJvf?x)5m_drBa{<{v)-@no62xF+03u%o#_<3D{{t=2t4pt|%K#zVw&^G+ z8z=-g1_!&kFfcJN3ITO8DDs)t6+v|p*6il!>Er66SeI9)ngVnKGZOnY0Le%@ Ai~s-t literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0A97.txt b/tests/MemoryOperations/0A97.txt new file mode 100644 index 00000000..9b8d1098 --- /dev/null +++ b/tests/MemoryOperations/0A97.txt @@ -0,0 +1,41 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A97" // get_vehicle_pointer +debug_on + +trace "0A97 (get_vehicle_pointer)" + + +wait 0 +request_model 400 +load_all_models_now +create_car 400 {xyz} 0.0 0.0 0.0 {result} 0@ + +1@ = 0 +0A97: get_vehicle_pointer 0@ {result} 1@ // tested opcode + +mark_car_as_no_longer_needed 0@ + +if + 1@ > 0x10000 // possibly valid pointer +then + trace "~g~~h~~h~0A97 (get_vehicle_pointer), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A97 (get_vehicle_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0A98.s b/tests/MemoryOperations/0A98.s new file mode 100644 index 0000000000000000000000000000000000000000..e6ca9510aea852bea0475d21095a1b62b02689f9 GIT binary patch literal 242 zcmZ3&%*SHjXlZenfl-l962wx_NKY+^&rixqO)iNq$j{6xNiEW3U}RunaA#umW$b2V zX9QAg3_yS&m_Zz7kfIq}KsJzdgb_$z0}4ojc#I4{2-A(AcWIK z9R+0rg#gFkU{@CgCI&_UphgBoKC`+as78W%-5foATwN6F^6FGmfG%KWVqgZkXfhW7 D%9T0u literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0A98.txt b/tests/MemoryOperations/0A98.txt new file mode 100644 index 00000000..2d8c264f --- /dev/null +++ b/tests/MemoryOperations/0A98.txt @@ -0,0 +1,41 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0A98" // get_vehicle_pointer +debug_on + +trace "0A98 (get_object_pointer)" + + +wait 0 +request_model 333 // golf club +load_all_models_now +create_object 333 {xyz} 0.0 0.0 0.0 {result} 0@ + +1@ = 0 +0A98: get_object_pointer 0@ {result} 1@ // tested opcode + +mark_object_as_no_longer_needed 0@ + +if + 1@ > 0x10000 // possibly valid pointer +then + trace "~g~~h~~h~0A98 (get_object_pointer), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0A98 (get_object_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AC6.s b/tests/MemoryOperations/0AC6.s new file mode 100644 index 0000000000000000000000000000000000000000..0995cd97d1add166a62f03f679195ca883f890e2 GIT binary patch literal 291 zcmZ3&%*SHj=xlbFfl-l90>o0#NKY+^&q+*5&519_&&(@HEz)FQWME-nV_*hR$G8}O z{rmqP$OZ~9F)*G1f?h6=6blPT;2Hx9qa;WvBLfgwg4MVfD+&5CFoysC{~u_LPF;Fk zT?Po@vQI}r*+3z{F*w-Og#l>AAE12 0x10000 // possibly valid pointer + 2@ == 0x11223344 +then + trace "~g~~h~~h~0AC6 (get_label_pointer), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC6 (get_label_pointer), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ +end + + +terminate_this_custom_script + +:DATA +hex + 44 33 22 11 + "some longer testing text" 00 +end diff --git a/tests/MemoryOperations/0AC7.s b/tests/MemoryOperations/0AC7.s new file mode 100644 index 0000000000000000000000000000000000000000..620d950cbdd82db9386a42c0779a17576b57adda GIT binary patch literal 257 zcmZ3&%*SHj=xlzNfl-l948&5x7#Pn0K`$4O%fi9}61c{|!YB!nW&|Niuo{pqUk1jg|Ns93ZPBVrudB-d zA?()aC@32!1ULoA@tSf@*!mrWI(bLD(MX@fg&d|`v$k^D#M8UP9 dAT_xpH3cZ9YG6^J;GdjaT9ldsbS%SUE&z)`LxcbT literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0AC7.txt b/tests/MemoryOperations/0AC7.txt new file mode 100644 index 00000000..64c9b2f8 --- /dev/null +++ b/tests/MemoryOperations/0AC7.txt @@ -0,0 +1,39 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC7" // get_vehicle_pointer +debug_on + +trace "0AC7 (get_var_pointer)" + + +wait 0 +1@ = 0x11223344 +0AC7: get_var_pointer 1@ {result} 0@ // tested opcode + +2@ = 0xCCCCCCCC +read_memory 0@ {size} 4 {vp} false {result} 2@ + +if and + 0@ > 0x10000 // possibly valid pointer + 2@ == 0x11223344 +then + trace "~g~~h~~h~0AC7 (get_var_pointer), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC7 (get_var_pointer), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AC8.s b/tests/MemoryOperations/0AC8.s new file mode 100644 index 0000000000000000000000000000000000000000..89f67acb46d38c6134569feb363bad7faaf7ed6c GIT binary patch literal 421 zcmZ3&%*SHj=xlMAfl-l948&5v3D8(`AD42*^U|NjRXr&X6;SC;`o*v-{ZP&QBqa10K1bzxv) zV3YvrVo>A@tSf@*!mrWI(bLD(MX@fg&d|`v$k^D#M8UP9AT_xpH3cZ9YG6^J;Gdja zT9lf?%mi{F*b6{w&HzC#7tpmVEG!H_DWE?XEI|ww2B6co{Ug@thDc721?nQ&>4pjj NryD>4KBrIS0syfFcLD$a literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0AC8.txt b/tests/MemoryOperations/0AC8.txt new file mode 100644 index 00000000..86569a8f --- /dev/null +++ b/tests/MemoryOperations/0AC8.txt @@ -0,0 +1,50 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC8" // allocate_memory +debug_on + +trace "0AC8 (allocate_memory)" + + +wait 0 +0@ = 0x11223344 +0AC8: allocate_memory {size} 4 {result} 0@ // tested opcode + +if and + 0@ > 0x10000 // possibly valid pointer + 0@ <> 0x11223344 +then + trace "~g~~h~~h~0AC8 (allocate_memory), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC8 (allocate_memory), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ +end + + +wait 0 +// in CLEO5 expect to alloceted memory be prefilled with zeros +2@ = 0xCCCCCCCC +read_memory 0@ {size} 4 {vp} false {result} 2@ + +if + 2@ == 0 +then + trace "~g~~h~~h~0AC8 (allocate_memory), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC8 (allocate_memory), #1 FAILED!~n~00000000 Expected~n~%08x Occured" 2@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AC9.s b/tests/MemoryOperations/0AC9.s new file mode 100644 index 0000000000000000000000000000000000000000..71d15d339a30ad513106f68f82d13f99286ed9ef GIT binary patch literal 225 zcmZ3&%*SHj=xTYGfl-l90K`(zD9A5}Ps_rVPs(h@)%xnF*7q<1FCjr05aGZ7_^*x85r~a|Njp(SEVk!t}X+F z&@IqWP&QBqa10K1bzxuvIsvGjL6Oh3t_Z3gmliiiPajto#k#yYkh>IID+*GROHxyS VeAP4s|K#M-qSO?iI~XQ&0RS7~I_LlZ literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0AC9.txt b/tests/MemoryOperations/0AC9.txt new file mode 100644 index 00000000..e6e31a90 --- /dev/null +++ b/tests/MemoryOperations/0AC9.txt @@ -0,0 +1,32 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC9" // free_memory +debug_on + +trace "0AC9 (free_memory)" + + +wait 0 +0@ = 0x11223344 +allocate_memory {size} 4 {result} 0@ + +0AC9: free_memory 0@ // tested opcode + +// not much to check within script. Did not crashed the game or printed error, so perhaps ok +trace "~g~~h~~h~0AC9 (free_memory), #0 PASSED" + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AE9.txt b/tests/MemoryOperations/0AE9.txt new file mode 100644 index 00000000..a5223108 --- /dev/null +++ b/tests/MemoryOperations/0AE9.txt @@ -0,0 +1,38 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AE9" // pop_float +debug_on + +trace "0AE9 (pop_float)" + + +wait 0 +0@s = '42.5' +get_var_pointer 0@ {result} 2@ +call_function 0x0823CEE {argCount} 1 {pop} 1 {arg} 2@ // double atof(const char *) + +0AE9: pop_float {result} 3@ // tested opcode + +if + 3@ == 42.5 +then + trace "~g~~h~~h~0AE9 (pop_float), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AE9 (pop_float), #0 FAILED!~n~42.5 Expected~n~%f Occured" 3@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AEA.s b/tests/MemoryOperations/0AEA.s new file mode 100644 index 0000000000000000000000000000000000000000..ca34b8d6e8532dfcdb438235efc830449d5e1216 GIT binary patch literal 222 zcmZ3&%*SHj=<0Zwfl-l92*gs*NKY+^FGx*^FG@|*WME`qVff0(!obYHz{bD~;!WcM zvVp8uTtI@EiQyVh%o-#QWcxBOR{a0}A83|ZU3y(z1_)sqs-vK6pb+2~9PH}Cz{J3) z0@T5v$mdyC1l56ClbfTbkE@GfU0$84fklOaYehk7a!G0mjPIYETw0Wx0&xq&WG(;; CVmea* literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0AEA.txt b/tests/MemoryOperations/0AEA.txt new file mode 100644 index 00000000..951463b8 --- /dev/null +++ b/tests/MemoryOperations/0AEA.txt @@ -0,0 +1,37 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AEA" // get_ped_ref +debug_on + +trace "0AEA (get_ped_ref)" + + +wait 0 +get_player_char 0 {handle} 0@ +get_ped_pointer 0@ {address} 1@ + +0AEA: get_ped_ref 1@ {result} 2@ // tested opcode + +if + 0@ == 2@ +then + trace "~g~~h~~h~0AEA (get_ped_ref), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AEA (get_ped_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AEB.s b/tests/MemoryOperations/0AEB.s new file mode 100644 index 0000000000000000000000000000000000000000..8687de188bc38cc418bf275c43b56e029dd4a3d3 GIT binary patch literal 255 zcmZ3&%*SHj=<0Nsfl-l948&5C&fa{&ovCWga|K>jtLHfsiu1QUZV17r06|NnuOXw{|H)n$MX z&^AX*+jJC^4HN<#gM(dN7?>CsnSr_(6#0DXilDmiYjkt;^l^1jtjnuYHL$2qaIGjv XO)g1If${y5lS_+IQy^|)n9KzLYIQ-A literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/0AEB.txt b/tests/MemoryOperations/0AEB.txt new file mode 100644 index 00000000..0c804131 --- /dev/null +++ b/tests/MemoryOperations/0AEB.txt @@ -0,0 +1,41 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AEB" // get_vehicle_ref +debug_on + +trace "0AEB (get_vehicle_ref)" + + +wait 0 +request_model 400 +load_all_models_now +create_car 400 {xyz} 0.0 0.0 0.0 {result} 0@ +get_vehicle_pointer 0@ {result} 1@ + +0AEB: get_vehicle_ref 1@ {result} 2@ // tested opcode + +mark_car_as_no_longer_needed 0@ + +if + 0@ == 2@ +then + trace "~g~~h~~h~0AEA (get_vehicle_ref), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AEA (get_vehicle_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/0AEC.txt b/tests/MemoryOperations/0AEC.txt new file mode 100644 index 00000000..4b60dac2 --- /dev/null +++ b/tests/MemoryOperations/0AEC.txt @@ -0,0 +1,41 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AEC" // get_object_ref +debug_on + +trace "0AEC (get_object_ref)" + + +wait 0 +request_model 333 // golf club +load_all_models_now +create_object 333 {xyz} 0.0 0.0 0.0 {result} 0@ +get_object_pointer 0@ {result} 1@ + +0AEC: get_object_ref 1@ {result} 2@ // tested opcode + +mark_object_as_no_longer_needed 0@ + +if + 0@ == 2@ +then + trace "~g~~h~~h~0AEC (get_object_ref), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AEC (get_object_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/2400.s b/tests/MemoryOperations/2400.s new file mode 100644 index 0000000000000000000000000000000000000000..b4b806e78e290029541247ab977123857a493b71 GIT binary patch literal 2443 zcmd7UJ4?e*6bJBgo7)N?n{f7IsM>N3E?5>{38(v0xJ8n51wGlrHx2 z*zr7eJdYjEW5@HWK$!3QxW;L3+w-cy^?kAPY(i%1rAno20WR?H&G#XfIq;f1@q1{g zw7gQba^8+-^J0pks+y+j%;Cih!!!$pVv)JLSh?MB?OoTw+>~5wv!8Bjw!PPM9TW=? zX|Orm)qlGZQCAD!OP}k-tB9)-a$UfLkgLKXxT16nT@%8t+85Wxi-@Zla=m#LbXCP& zKfd4?x=P}%`Tp;99F+#o0WIXZ|0L+DiMsO15JOiQ!8O}A2TmVFT=kIa3?_u;fG+0R zo47G_O-6U^;<<-3z)JL%BLmjLxl*AzZuNz;zUr2G0RA-HU?%! z2F5c$0Aw*SFx~|MAd8uS@f{E_ssNR+u&{unt}(DMS;AEVSxi8KfD}}zF9W0h|NsAi zma5gI*VSc!5T@-q3d#lw0gl1Jt}YBf+iv{(|DQpTFSD))sspzsH%Ct&R~N;)yt-s4 zP%wZ3g;Xd|aIGjvO)g1I0jg3pu&7W#5dO)@rA4VJK+gi*4RkYE!N7GZ{7+s8&P&0-Z&|ic@!Wbze_Wvd_B#g0#gpmBrNe4mZs2i zZh^Ckm79ry-x&%RfWQxkfmI3%a+SitA_7!~TBWQ5>nzUCO;yOr&x7d%CLgFyU=8yY zSZNVXK4wVCrwpirs2au$TL`JcT%(Ssa4m*ZGl&`n=rf?3876ael&6IP3j;xzDe0+2 Q3MHwYmJH+>B~77=q9{PDf);uNMVkNdM~nNT#r-#c ztT~Q@ACA~1+unDt9vRHi$vE9!%I9-gz!(pojsxM~foGS4S6VCvS|OQ^GdkO*cCr^8=t*-3c- literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/2402.txt b/tests/MemoryOperations/2402.txt new file mode 100644 index 00000000..47bc39cb --- /dev/null +++ b/tests/MemoryOperations/2402.txt @@ -0,0 +1,156 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "2402" // write_struct +debug_on + + +trace "2402 (write_struct)" + +wait 0 +// write 0 bytes +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 0 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xdddddddd + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #0 FAILED!~n~cccccccc dddddddd eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// write 1 byte +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 1 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xdddddd78 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #1 FAILED!~n~cccccccc dddddd78 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// write 2 bytes +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 2 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xdddd5678 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #2 FAILED!~n~cccccccc dddd5678 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// write 3 bytes +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 3 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xdd345678 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #3 FAILED!~n~cccccccc dd345678 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// write 4 bytes +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 4 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0x12345678 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #4 FAILED!~n~cccccccc 12345678 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// try write 5 bytes +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@ = 0xdddddddd +3@ = 0xeeeeeeee + +2402: write_struct 0@ {offset} 4 {size} 5 {value} 0x12345678 // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0x12345678 + 3@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #5 PASSED (expected warning)" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #5 FAILED!~n~cccccccc 12345678 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ +end + + +wait 0 +// write string +get_var_pointer 1@ {store_to} 0@ +1@ = 0xcccccccc +2@s = 'CCCCCCCC' +4@ = 0xeeeeeeee + + +2402: write_struct 0@ {offset} 5 {size} 2 {value} "some longer text" // tested opcode +if and + 1@ == 0xcccccccc + 2@s == 'CsoCCCCC' // trimmed by target value capacity + 4@ == 0xeeeeeeee +then + trace "~g~~h~~h~2402 (write_struct), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~2402 (write_struct), #6 FAILED!~n~cccccccc 'CsoCCCCC' eeeeeeee Expected~n~%08x %s %08x Occured" 1@ 2@s 4@ +end + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/2403.s b/tests/MemoryOperations/2403.s new file mode 100644 index 0000000000000000000000000000000000000000..6c99814fc2adf79c629e12d49b357dab1be80b8e GIT binary patch literal 108 zcmZ3&%*SG6Vqkojfl-l91jJI%NXsuuPc4bhP0h_Os?=m)WME-nV_;@rV01B75*_K<2%BCV1!V(;0LS29R~LrKTmS)L8Cw7V literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/2403.txt b/tests/MemoryOperations/2403.txt new file mode 100644 index 00000000..c712d809 --- /dev/null +++ b/tests/MemoryOperations/2403.txt @@ -0,0 +1,32 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "2403" // forget_memory +debug_on + +trace "2403 (forget_memory)" + + +wait 0 +0@ = 0x11223344 +allocate_memory {size} 4 {result} 0@ + +2403: forget_memory 0@ // tested opcode + +// not much to check within script. Did not crashed the game or printed error, so perhaps ok +trace "~g~~h~~h~2403 (forget_memory), #0 PASSED" + + +terminate_this_custom_script diff --git a/tests/MemoryOperations/2404.s b/tests/MemoryOperations/2404.s new file mode 100644 index 0000000000000000000000000000000000000000..5e6e5eebd57ee495fcf3ca09b6a56e5a5da41ee1 GIT binary patch literal 255 zcmZ3&%*SG6VqkKZfl-l96~t1|NKY+^FHSDXEC5m^MWx9l@mZzCCGp8csfi`2DVhu{ zD$EQF^SPKA8Lly~FjzAHc|f)=17p_z|Nnu;Sk|T2)n$MXQRe9=C>tmQI0gs1x-c*? zFfs!TVNm3YsVjmSLXtUdj-EcQE{b(|b*csy6$-8u1*yqEN5lC3$;qWfsVP9`0v*jT GnF|0jxlk(r literal 0 HcmV?d00001 diff --git a/tests/MemoryOperations/2404.txt b/tests/MemoryOperations/2404.txt new file mode 100644 index 00000000..4c8af6a4 --- /dev/null +++ b/tests/MemoryOperations/2404.txt @@ -0,0 +1,35 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "2404" // get_script_struct_just_created +debug_on + +trace "2404 (get_script_struct_just_created)" + +// no wait! +2404: get_script_struct_just_created 0@ // tested opcode + +get_this_script_struct 1@ + +if + 0@ == 1@ // this script is last created one +then + trace "~g~~h~~h~2404 (get_script_struct_just_created), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~2404 (get_script_struct_just_created), #0 FAILED!~n~%08x Expected~n~%08x Occured" 1@ 0@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests_runner.cs b/tests/cleo_tests_runner.cs new file mode 100644 index 0000000000000000000000000000000000000000..d6fd8a659f48f7a0f22f932a1c20b9f24d362a35 GIT binary patch literal 581 zcmZ3&%aEPa$J8Oq93kw4y1CZXwu#t;# z;s5{tS(x}(lYu&|__#n+dpozjEa0>b>(#+<1_Qp6>{@aN^?>b z)K!bsnHd=vPH?fZFfy|;T;>82eB7$VF)<*lrB}?%zyMOf#=y+Rzyegq;=s(#a1F?O z22u(X1af^D7{$U2v<;0QCbE9feSKK(#NpfRaF=ML;2-KZWXw>iqH*z~P{f zmS38eq6qaEC(LbHsNO=e4ApBV{^IpoFU+CL93Yo6SvCL}EP7yyRZs5;)Ym`Zrm#UA r0d)a95<#H}3@J3HAo=?W&;+0-&qAnVpkKlMJ_)h_NU$?(;$i>*p8Sgw literal 0 HcmV?d00001 diff --git a/tests/cleo_tests_runner.txt b/tests/cleo_tests_runner.txt new file mode 100644 index 00000000..404ff27b --- /dev/null +++ b/tests/cleo_tests_runner.txt @@ -0,0 +1,96 @@ +{$CLEO .cs} +{$USE debug} +{$USE memory} +{$USE file} + +script_name 'CleoTest' +debug_on + +print_big_formatted "CLEO TESTING" {time} 5000 {style} TextStyle.MiddleSmaller + +wait 5000 // wait for game to fade in +clear_prints + +cleo_call @RUN_TESTS_DIR {argCount} 2 {args} "cleo:" "cleo_tests" + +print_big_formatted "DONE" {time} 5000 {style} TextStyle.MiddleSmaller + +terminate_this_custom_script + + + +// arg 0 - base directory path +// arg 1 - directory name +:RUN_TESTS_DIR + trace "~w~Testing module '%s'" 1@ + + // process all test files + 5@ = allocate_memory 260 + string_format {buffer} 5@ {format} "%s\\%s\\*.s" 0@ 1@ + + 6@ = 0 // search handle + 7@ = allocate_memory 64 + + if + find_first_file 5@ {handle} 6@ {fileName} 7@ + then + while true + string_format {buffer} 5@ {format} "%s\\%s\\%s" 0@ 1@ 7@ + if + does_file_exist 5@ // files only + then + stream_custom_script 5@ + get_last_created_custom_script 11@ + + while is_script_running 11@ + wait 0 + end + end + + write_memory 7@ {size} 4 {value} 0 {vp} false + if + not find_next_file 6@ {fileName} 7@ + then + break + end + end + + find_close 6@ + else + trace "~r~No tests found!" + end + + // process all sub directories + string_format {buffer} 5@ {format} "%s\\%s\\*" 0@ 1@ + + 6@ = 0 // search handle + write_memory 7@ {size} 4 {value} 0 {vp} false + if + find_first_file 5@ {handle} 6@ {fileName} 7@ + then + while true + string_format {buffer} 5@ {format} "%s\\%s\\%s" 0@ 1@ 7@ + 8@ = read_memory 7@ {size} 4 {vp} false + if and + 8@ <> 0x2E // "." + 8@ <> 0x2E2E // ".." + does_directory_exist 5@ // directories only + then + string_format {buffer} 5@ {format} "%s\\%s" 0@ 1@ + cleo_call @RUN_TESTS_DIR {argCount} 2 {args} 5@ 7@ + end + + write_memory 7@ {size} 4 {value} 0 {vp} false + if + not find_next_file 6@ {fileName} 7@ + then + break + end + end + + find_close 6@ + end + + free_memory 5@ + free_memory 7@ +cleo_return From d5106ee78dfe866422b4a090c10ac48fa115fd24 Mon Sep 17 00:00:00 2001 From: Seemann Date: Mon, 19 Feb 2024 13:03:48 -0500 Subject: [PATCH 086/216] copy .cleo5 plugins --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2e6b9719..d501a961 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -52,6 +52,7 @@ jobs: copy third-party\bass\bass.dll .output\Release\bass.dll copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins + copy cleo_plugins\.output\*.cleo5 .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins @REM install Silent's ASI Loader From d34aca529d4db48e6a7140ef1320063a0cffb043 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 22 Feb 2024 18:42:52 +0100 Subject: [PATCH 087/216] Opcodes 0A8E, 0A8F, 0A90, 0A91 moved to IntOperations plugin (#62) --- cleo_plugins/IntOperations/IntOperations.cpp | 53 ++++++++++++++++++++ source/CCustomOpcodeSystem.cpp | 44 ---------------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index febd6a80..46758529 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -19,6 +19,11 @@ class IntOperations } //register opcodes + CLEO_RegisterOpcode(0x0A8E, opcode_0A8E); // x = a + b (int) + CLEO_RegisterOpcode(0x0A8F, opcode_0A8F); // x = a - b (int) + CLEO_RegisterOpcode(0x0A90, opcode_0A90); // x = a * b (int) + CLEO_RegisterOpcode(0x0A91, opcode_0A91); // x = a / b (int) + CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); CLEO_RegisterOpcode(0x0B11, Script_IntOp_OR); CLEO_RegisterOpcode(0x0B12, Script_IntOp_XOR); @@ -36,6 +41,54 @@ class IntOperations CLEO_RegisterOpcode(0x0B1E, Sign_Extend); } + //0A8E=3,%3d% = %1d% + %2d% ; int + static OpcodeResult WINAPI opcode_0A8E(CRunningScript* thread) + { + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); + + auto result = a + b; + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } + + //0A8F=3,%3d% = %1d% - %2d% ; int + static OpcodeResult WINAPI opcode_0A8F(CRunningScript* thread) + { + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); + + auto result = a - b; + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } + + //0A90=3,%3d% = %1d% * %2d% ; int + static OpcodeResult WINAPI opcode_0A90(CRunningScript* thread) + { + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); + + auto result = a * b; + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } + + //0A91=3,%3d% = %1d% / %2d% ; int + static OpcodeResult WINAPI opcode_0A91(CRunningScript* thread) + { + auto a = OPCODE_READ_PARAM_INT(); + auto b = OPCODE_READ_PARAM_INT(); + + auto result = a / b; + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } + static OpcodeResult WINAPI Script_IntOp_AND(CScriptThread* thread) /**************************************************************** Opcode Format diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 441bbdc6..c50406f1 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -22,10 +22,6 @@ namespace CLEO template inline CRunningScript& operator<<(CRunningScript& thread, memory_pointer pval); template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); - OpcodeResult __stdcall opcode_0A8E(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A8F(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A90(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A91(CRunningScript *thread); OpcodeResult __stdcall opcode_0A92(CRunningScript *thread); OpcodeResult __stdcall opcode_0A93(CRunningScript *thread); OpcodeResult __stdcall opcode_0A94(CRunningScript *thread); @@ -231,10 +227,6 @@ namespace CLEO CCustomOpcodeSystem::CCustomOpcodeSystem() { // register CLEO opcodes - CLEO_RegisterOpcode(0x0A8E, opcode_0A8E); - CLEO_RegisterOpcode(0x0A8F, opcode_0A8F); - CLEO_RegisterOpcode(0x0A90, opcode_0A90); - CLEO_RegisterOpcode(0x0A91, opcode_0A91); CLEO_RegisterOpcode(0x0A92, opcode_0A92); CLEO_RegisterOpcode(0x0A93, opcode_0A93); CLEO_RegisterOpcode(0x0A94, opcode_0A94); @@ -955,42 +947,6 @@ namespace CLEO /* Opcode definitions */ /************************************************************************/ - //0A8E=3,%3d% = %1d% + %2d% ; int - OpcodeResult __stdcall opcode_0A8E(CRunningScript *thread) - { - GetScriptParams(thread, 2); - opcodeParams[0].nParam += opcodeParams[1].nParam; - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - - //0A8F=3,%3d% = %1d% - %2d% ; int - OpcodeResult __stdcall opcode_0A8F(CRunningScript *thread) - { - GetScriptParams(thread, 2); - opcodeParams[0].nParam -= opcodeParams[1].nParam; - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - - //0A90=3,%3d% = %1d% * %2d% ; int - OpcodeResult __stdcall opcode_0A90(CRunningScript *thread) - { - GetScriptParams(thread, 2); - opcodeParams[0].nParam *= opcodeParams[1].nParam; - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - - //0A91=3,%3d% = %1d% / %2d% ; int - OpcodeResult __stdcall opcode_0A91(CRunningScript *thread) - { - GetScriptParams(thread, 2); - opcodeParams[0].nParam /= opcodeParams[1].nParam; - SetScriptParams(thread, 1); - return OR_CONTINUE; - } - //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { From 2f1974082f34bea6fd15e32f6c0d7a341a9a8f7d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 24 Feb 2024 22:11:12 +0100 Subject: [PATCH 088/216] String args support for legacy opcodes (#61) * GetScriptStringParam rewriten inside CLEO. * Warning fixes. * GetScriptStringParam updates. * Fixes * Removed OPCODE_VALIDATE_STR_ARG_READ macro. * Updated usage of write string macro. * ScrLog hook disabling. * fixup! ScrLog hook disabling. * Performance updates. --- cleo_sdk/CLEO_Utils.h | 8 +- source/CCustomOpcodeSystem.cpp | 158 ++++++++++----------------------- source/CCustomOpcodeSystem.h | 4 - source/CScriptEngine.cpp | 112 ++++++++++++++++++++--- source/CScriptEngine.h | 3 +- 5 files changed, 154 insertions(+), 131 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 81123e1d..bb14ce6d 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -323,14 +323,14 @@ namespace CLEO if (str != nullptr && (size_t)str <= MinValidAddress) { - SHOW_ERROR("Invalid '0x%X' source pointer of output string argument #%d in script %s \nScript suspended.", str, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid '0x%X' source pointer of output string argument #%d in script %s \nScript suspended.", str, CLEO_GetParamsHandledCount() + 1, ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } if (!_paramWasString(true)) { - SHOW_ERROR("Output argument #%d expected to be variable string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Output argument #%d expected to be variable string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount() + 1, ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } @@ -341,7 +341,7 @@ namespace CLEO if ((size_t)ptr <= MinValidAddress) { - SHOW_ERROR("Invalid '0x%X' pointer of output string argument #%d in script %s \nScript suspended.", ptr, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid '0x%X' pointer of output string argument #%d in script %s \nScript suspended.", ptr, CLEO_GetParamsHandledCount() + 1, ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } @@ -466,6 +466,6 @@ namespace CLEO #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, value); \ + #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, (void*)value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c50406f1..de92cc6a 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -11,7 +11,6 @@ #include #include -#define OPCODE_VALIDATE_STR_ARG_READ(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } #define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } #define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } @@ -22,6 +21,7 @@ namespace CLEO template inline CRunningScript& operator<<(CRunningScript& thread, memory_pointer pval); template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); + OpcodeResult __stdcall opcode_0A92(CRunningScript *thread); OpcodeResult __stdcall opcode_0A93(CRunningScript *thread); OpcodeResult __stdcall opcode_0A94(CRunningScript *thread); @@ -435,77 +435,9 @@ namespace CLEO { static char internal_buf[MAX_STR_LEN]; if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } - const auto bufLength = bufSize ? bufSize - 1 : 0; // max text length (minus terminator char) - - CCustomOpcodeSystem::lastErrorMsg.clear(); - - auto paramType = CLEO_GetOperandType(thread); - if (IsImmInteger(paramType) || IsVariable(paramType)) // TODO: it is possible to differentiate between int/float arrays - { - GetScriptParams(thread, 1); - - if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) - { - CCustomOpcodeSystem::lastErrorMsg = (opcodeParams[0].dwParam == 0) ? - "Reading string from 'null' pointer argument" : - stringPrintf("Reading string from invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + int bufLength = (int)bufSize - 1; // max text length (minus terminator char), -1 for unknown - return nullptr; // error, target buffer untouched - } - - char* str = opcodeParams[0].pcParam; - auto length = strlen(str); - - if (length > bufLength) - { - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); - length = bufLength; // clamp to target buffer size - } - - if (length) strncpy(buf, str, length); - - if (bufSize > 0) buf[length] = '\0'; // string terminator - return buf; - } - else - if(IsImmString(paramType) || IsVarString(paramType)) - { - if (paramType == DT_VARLEN_STRING) - { - // prococess here as GetScriptStringParam can not obtain strings with lenght greater than 128 - thread->IncPtr(1); // already processed paramType - - DWORD length = (BYTE)*thread->GetBytePointer(); // as unsigned byte! - thread->IncPtr(1); // length info - - char* str = (char*)thread->GetBytePointer(); - thread->IncPtr(length); // text data - - if (length > bufLength) - { - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole string (%d) from argument", bufLength, length); - length = bufLength; // clamp to target buffer size - } - - if (length) strncpy(buf, str, length); - if (bufSize > 0) buf[length] = '\0'; // string terminator - } - else - { - size_t maxSize = 16 + 1; // long string and terminator - maxSize = min(maxSize, bufSize); - ZeroMemory(buf, maxSize); - - GetScriptStringParam(thread, buf, (BYTE)min(bufSize, 0xFF)); // standard game's function - } - - return buf; - } - - // unsupported param type - GetScriptParams(thread, 1); // skip unhandled param - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Reading string argument, got %s", ToKindStr(paramType)); - return nullptr; // error, target buffer untouched + return CLEO::GetScriptStringParam(thread, 0, buf, bufLength); } // write output\result string parameter @@ -853,7 +785,7 @@ namespace CLEO { argumentIsStr[i] = true; - auto str = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(str) + auto str = OPCODE_READ_PARAM_STRING(); stringParams.emplace_front(str); arg->pcParam = stringParams.front().data(); } @@ -891,13 +823,13 @@ namespace CLEO auto paramType = *(eDataType*)arg; if (IsVarString(paramType)) { - WriteStringParam(thread, arguments[i].pcParam); + OPCODE_WRITE_PARAM_STRING(arguments[i].pcParam); } else if (IsVariable(paramType)) { if (argumentIsStr[i]) // source was string, write it into provided buffer ptr { - auto ok = WriteStringParam(thread, arguments[i].pcParam); OPCODE_VALIDATE_STR_ARG_WRITE(ok) + OPCODE_WRITE_PARAM_STRING(arguments[i].pcParam); } else *thread << arguments[i].dwParam; @@ -950,7 +882,7 @@ namespace CLEO //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto path = OPCODE_READ_PARAM_STRING(); auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName().c_str()); @@ -990,7 +922,7 @@ namespace CLEO //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto path = OPCODE_READ_PARAM_STRING(); auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension @@ -1056,7 +988,7 @@ namespace CLEO //0AAC=2, %2d% = load_audiostream %1d% // IF and SET OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto path = OPCODE_READ_PARAM_STRING(); auto filename = reinterpret_cast(thread)->ResolvePath(path); auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); @@ -1441,7 +1373,7 @@ namespace CLEO //0ABA=1,end_custom_thread_named %1d% OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) { - auto threadName = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(threadName) + auto threadName = OPCODE_READ_PARAM_STRING(); auto deleted_thread = (CCustomScript*)GetInstance().ScriptEngine.FindScriptNamed(threadName, false, true, 0); if (deleted_thread) @@ -1519,7 +1451,7 @@ namespace CLEO //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET OpcodeResult __stdcall opcode_0AC1(CRunningScript *thread) { - auto path = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(path) + auto path = OPCODE_READ_PARAM_STRING(); auto stream = GetInstance().SoundSystem.LoadStream(path, true); *thread << stream; @@ -1579,7 +1511,7 @@ namespace CLEO //0ACA=1,show_text_box %1d% OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) { - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + auto text = OPCODE_READ_PARAM_STRING(); PrintHelp(text); return OR_CONTINUE; } @@ -1587,9 +1519,9 @@ namespace CLEO //0ACB=3,show_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) { - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) - DWORD time; *thread >> time; - DWORD style; *thread >> style; + auto text = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); + auto style = OPCODE_READ_PARAM_INT(); PrintBig(text, time, style); return OR_CONTINUE; @@ -1598,8 +1530,8 @@ namespace CLEO //0ACC=2,show_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) { - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) - DWORD time; *thread >> time; + auto text = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); Print(text, time); return OR_CONTINUE; @@ -1608,8 +1540,8 @@ namespace CLEO //0ACD=2,show_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) { - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) - DWORD time; *thread >> time; + auto text = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); PrintNow(text, time); return OR_CONTINUE; @@ -1618,7 +1550,7 @@ namespace CLEO //0ACE=-1,show_formatted_text_box %1d% OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread) { - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + auto format = OPCODE_READ_PARAM_STRING(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintHelp(text); @@ -1628,9 +1560,9 @@ namespace CLEO //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread) { - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - DWORD time; *thread >> time; - DWORD style; *thread >> style; + auto format = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); + auto style = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintBig(text, time, style); @@ -1640,8 +1572,8 @@ namespace CLEO //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread) { - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - DWORD time; *thread >> time; + auto format = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) Print(text, time); @@ -1651,8 +1583,8 @@ namespace CLEO //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread) { - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) - DWORD time; *thread >> time; + auto format = OPCODE_READ_PARAM_STRING(); + auto time = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintNow(text, time); @@ -1686,7 +1618,7 @@ namespace CLEO OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) { auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + auto format = OPCODE_READ_PARAM_STRING(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) WriteStringParam(resultArg, text); @@ -1696,9 +1628,8 @@ namespace CLEO //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread) { - auto src = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(src) - char fmt[MAX_STR_LEN]; - auto format = ReadStringParam(thread, fmt, sizeof(fmt)); OPCODE_VALIDATE_STR_ARG_READ(format) + auto src = OPCODE_READ_PARAM_STRING(); + char format[MAX_STR_LEN]; OPCODE_READ_PARAM_STRING_BUFF(format, MAX_STR_LEN); auto resultType = thread->PeekDataType(); if (!IsVariable(resultType) && IsVarString(resultType)) @@ -1756,7 +1687,7 @@ namespace CLEO //0ADC=1, test_cheat %1d% OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + auto text = OPCODE_READ_PARAM_STRING(); SetScriptCondResult(thread, TestCheat(text)); return OR_CONTINUE; } @@ -1782,33 +1713,36 @@ namespace CLEO //0ADE=2,%2d% = text_by_GXT_entry %1d% OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread) { - auto gxt = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(gxt) + auto gxt = OPCODE_READ_PARAM_STRING(); - if (*thread->GetBytePointer() >= 1 && *thread->GetBytePointer() <= 8) - *thread << GetInstance().TextManager.Get(gxt); + auto txt = GetInstance().TextManager.Get(gxt); + + if (IsVarString(thread->PeekDataType())) + { + OPCODE_WRITE_PARAM_STRING(txt); + } else { - auto ok = WriteStringParam(thread, GetInstance().TextManager.Get(gxt)); OPCODE_VALIDATE_STR_ARG_WRITE(ok) + OPCODE_WRITE_PARAM_PTR(txt); // address of the text } - return OR_CONTINUE; } //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) { - char gxtLabel[8]; // 7 + terminator character - auto gxtOk = ReadStringParam(thread, gxtLabel, sizeof(gxtLabel)); OPCODE_VALIDATE_STR_ARG_READ(gxtOk) - auto text = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(text) + char gxtLabel[8] = { 0 }; // 7 + terminator character + auto gxt = OPCODE_READ_PARAM_STRING_BUFF(gxtLabel, 7); + auto txt = OPCODE_READ_PARAM_STRING(); - GetInstance().TextManager.AddFxt(gxtLabel, text); + GetInstance().TextManager.AddFxt(gxt, txt); return OR_CONTINUE; } //0AE0=1,remove_dynamic_GXT_entry %1d% OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread) { - auto gxt = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(gxt) + auto gxt = OPCODE_READ_PARAM_STRING(); GetInstance().TextManager.RemoveFxt(gxt); return OR_CONTINUE; @@ -1954,8 +1888,8 @@ namespace CLEO OpcodeResult __stdcall opcode_0AED(CRunningScript *thread) { // this opcode is useless now - float val; *thread >> val; - auto format = ReadStringParam(thread); OPCODE_VALIDATE_STR_ARG_READ(format) + auto val = OPCODE_READ_PARAM_FLOAT(); + auto format = OPCODE_READ_PARAM_STRING(); auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) sprintf_s(resultArg.data, resultArg.size, format, val); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index ebf98472..adc01de9 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -43,13 +43,9 @@ namespace CLEO static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); - static OpcodeResult CallFunctionGeneric(WORD opcode, CRunningScript* thread, bool thisCall, bool returnArg); static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); private: - friend OpcodeResult __stdcall opcode_0AA2(CRunningScript *pScript); - friend OpcodeResult __stdcall opcode_0AA3(CRunningScript *pScript); - typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); static const size_t OriginalOpcodeHandlersCount = (LastOriginalOpcode / 100) + 1; // 100 opcodes peer handler diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 14bffd73..5240deca 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -16,7 +16,6 @@ namespace CLEO DWORD FUNC_SetScriptParams; DWORD FUNC_SetScriptCondResult; DWORD FUNC_GetScriptParamPointer1; - DWORD FUNC_GetScriptStringParam; DWORD FUNC_GetScriptParamPointer2; void(__thiscall * AddScriptToQueue)(CRunningScript *, CRunningScript **queue); @@ -28,7 +27,6 @@ namespace CLEO void(__thiscall * SetScriptParams)(CRunningScript *, int count); void(__thiscall * SetScriptCondResult)(CRunningScript *, bool); SCRIPT_VAR * (__thiscall * GetScriptParamPointer1)(CRunningScript *); - void(__thiscall * GetScriptStringParam)(CRunningScript *, char* buf, BYTE len); SCRIPT_VAR * (__thiscall * GetScriptParamPointer2)(CRunningScript *, int __unused__); void RunScriptDeleteDelegate(CRunningScript *script); @@ -131,15 +129,102 @@ namespace CLEO return (SCRIPT_VAR*)((size_t)result + pScript->GetBasePointer()); } - void __fastcall _GetScriptStringParam(CRunningScript *pScript, int dummy, char *buf, int len) + char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen) { - _asm + if (buff == nullptr || buffLen == 0) return buff; + + if (buffLen < 0) buffLen = 0x7FFFFFFF; // unknown - unlimited + + auto paramType = thread->PeekDataType(); + auto arrayType = IsArray(paramType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + auto isVariableInt = IsVariable(paramType) && (arrayType == eArrayDataType::ADT_NONE || arrayType == eArrayDataType::ADT_INT); + + // integer address to text buffer + if (IsImmInteger(paramType) || isVariableInt) { - mov ecx, pScript - push len - push buf - call FUNC_GetScriptStringParam + GetScriptParams(thread, 1); + + if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) + { + LOG_WARNING(thread, "Invalid '0x%X' pointer of input string argument #%d in script %s", opcodeParams[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + return nullptr; // error + } + + auto len = min((int)strlen(opcodeParams[0].pcParam), buffLen); + memcpy(buff, opcodeParams[0].pcParam, len); + if (len < buffLen) buff[len] = '\0'; // add terminator if possible + return buff; + } + else if (paramType == DT_VARLEN_STRING) + { + thread->IncPtr(1); // already processed paramType + + DWORD length = *thread->GetBytePointer(); // as unsigned byte! + thread->IncPtr(1); // length info + + char* str = (char*)thread->GetBytePointer(); + thread->IncPtr(length); // text data + + memcpy(buff, str, min(buffLen, (int)length)); + if ((int)length < buffLen) buff[length] = '\0'; // add terminator if possible + return buff; + } + else if (IsImmString(paramType)) + { + thread->IncPtr(1); // already processed paramType + auto str = (char*)thread->GetBytePointer(); + + switch (paramType) + { + case DT_TEXTLABEL: + { + memcpy(buff, str, min(buffLen, 8)); + thread->IncPtr(8); // text data + return buff; + } + + case DT_STRING: + { + memcpy(buff, str, min(buffLen, 16)); + thread->IncPtr(16); // ext data + return buff; + } + } + } + else if (IsVarString(paramType)) + { + switch (paramType) + { + // short string variable + case DT_VAR_TEXTLABEL: + case DT_LVAR_TEXTLABEL: + case DT_VAR_TEXTLABEL_ARRAY: + case DT_LVAR_TEXTLABEL_ARRAY: + { + auto str = (char*)GetScriptParamPointer(thread); + memcpy(buff, str, min(buffLen, 8)); + if (buffLen > 8) buff[8] = '\0'; // add terminator if possible + return buff; + } + + // long string variable + case DT_VAR_STRING: + case DT_LVAR_STRING: + case DT_VAR_STRING_ARRAY: + case DT_LVAR_STRING_ARRAY: + { + auto str = (char*)GetScriptParamPointer(thread); + memcpy(buff, str, min(buffLen, 16)); + if (buffLen > 16) buff[16] = '\0'; // add terminator if possible + return buff; + } + } } + + // unsupported param type + LOG_WARNING(thread, "Argument #%d expected to be string, got %s in script %s", CLEO_GetParamsHandledCount(), ToKindStr(paramType, arrayType), ScriptInfoStr(thread).c_str()); + GetScriptParams(thread, 1); // try skip unhandled param + return nullptr; // error } SCRIPT_VAR * __fastcall _GetScriptParamPointer2(CRunningScript *pScript, int dummy, int unused) @@ -765,7 +850,6 @@ namespace CLEO FUNC_SetScriptParams = gvm.TranslateMemoryAddress(MA_SET_SCRIPT_PARAMS_FUNCTION); FUNC_SetScriptCondResult = gvm.TranslateMemoryAddress(MA_SET_SCRIPT_COND_RESULT_FUNCTION); FUNC_GetScriptParamPointer1 = gvm.TranslateMemoryAddress(MA_GET_SCRIPT_PARAM_POINTER1_FUNCTION); - FUNC_GetScriptStringParam = gvm.TranslateMemoryAddress(MA_GET_SCRIPT_STRING_PARAM_FUNCTION); FUNC_GetScriptParamPointer2 = gvm.TranslateMemoryAddress(MA_GET_SCRIPT_PARAM_POINTER2_FUNCTION); AddScriptToQueue = reinterpret_cast(_AddScriptToQueue); @@ -777,7 +861,6 @@ namespace CLEO SetScriptParams = reinterpret_cast(_SetScriptParams); SetScriptCondResult = reinterpret_cast(_SetScriptCondResult); GetScriptParamPointer1 = reinterpret_cast(_GetScriptParamPointer1); - GetScriptStringParam = reinterpret_cast(_GetScriptStringParam); GetScriptParamPointer2 = reinterpret_cast(_GetScriptParamPointer2); SaveScmData = gvm.TranslateMemoryAddress(MA_SAVE_SCM_DATA_FUNCTION); @@ -796,6 +879,13 @@ namespace CLEO inj.MemoryReadOffset(addr.address + 1, ProcessScript); inj.ReplaceFunction(HOOK_ProcessScript, addr); + inj.InjectFunction(GetScriptStringParam, gvm.TranslateMemoryAddress(MA_GET_SCRIPT_STRING_PARAM_FUNCTION)); + // setup ScrLog plugin to not patch it again + auto scrLogConfig = FS::absolute("scrlog.ini"); + if (FS::is_regular_file(scrLogConfig)) WritePrivateProfileString("CONFIG", "HOOK_COLLECT_STRING", "FALSE", scrLogConfig.string().c_str()); + scrLogConfig = FS::absolute("scripts\\scrlog.ini"); + if (FS::is_regular_file(scrLogConfig)) WritePrivateProfileString("CONFIG", "HOOK_COLLECT_STRING", "FALSE", scrLogConfig.string().c_str()); + scriptSprites = gvm.TranslateMemoryAddress(MA_SCRIPT_SPRITE_ARRAY); scriptDraws = gvm.TranslateMemoryAddress(MA_SCRIPT_DRAW_ARRAY); scriptTexts = gvm.TranslateMemoryAddress(MA_SCRIPT_TEXT_ARRAY); @@ -1112,6 +1202,8 @@ namespace CLEO return true; } + + return false; }; // standard scripts diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 1507a9aa..6364440b 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -154,9 +154,10 @@ namespace CLEO extern void(__thiscall * SetScriptParams)(CRunningScript *, int count); extern void(__thiscall * SetScriptCondResult)(CRunningScript *, bool); extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer1)(CRunningScript *); - extern void(__thiscall * GetScriptStringParam)(CRunningScript *, char* buf, BYTE len); extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer2)(CRunningScript *, int __unused__); + char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); + inline SCRIPT_VAR * GetScriptParamPointer(CRunningScript *thread) { SCRIPT_VAR* ptr = GetScriptParamPointer2(thread, 0); From 92280400cb65993ec3810d738e22ee3a8b397557 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 25 Feb 2024 21:41:47 +0100 Subject: [PATCH 089/216] Audio plugin (#63) * Introduced Audio plugin. * Added opcode set_audio_stream_source_size * Refactoring. Review fixes. * Two new opcodes. * Test files added to release pack. * fixup! Test files added to release pack. * Review fix. * Review fix. --- .github/workflows/main.yml | 3 +- CHANGELOG.md | 18 +- CLEO5.vcxproj | 14 +- cleo_plugins/Audio/Audio.cpp | 363 +++++++++++++++ cleo_plugins/Audio/Audio.filters | 43 ++ cleo_plugins/Audio/Audio.vcxproj | 156 +++++++ cleo_plugins/Audio/Audio.vcxproj.filters | 70 +++ cleo_plugins/Audio/C3DAudioStream.cpp | 81 ++++ cleo_plugins/Audio/C3DAudioStream.h | 26 ++ cleo_plugins/Audio/CAudioStream.cpp | 212 +++++++++ cleo_plugins/Audio/CAudioStream.h | 71 +++ cleo_plugins/Audio/CSoundSystem.cpp | 206 +++++++++ cleo_plugins/Audio/CSoundSystem.h | 49 +++ .../Audio}/bass/bass.dll | Bin .../Audio}/bass/bass.h | 0 cleo_plugins/CLEO_Plugins.sln | 6 + cleo_sdk/CLEO.h | 5 +- cleo_sdk/CLEO_Utils.h | 28 +- source/CCustomOpcodeSystem.cpp | 183 +------- source/CCustomOpcodeSystem.h | 1 - source/CDebug.cpp | 2 +- source/CScriptEngine.cpp | 6 +- source/CSoundSystem.cpp | 415 ------------------ source/CSoundSystem.h | 115 ----- source/CleoBase.cpp | 85 ++-- source/CleoBase.h | 13 +- ...ests_runner.txt => .cleo_tests_runner.txt} | 2 +- tests/cleo_tests/0AEE.s | Bin 0 -> 505 bytes tests/cleo_tests/0AEE.txt | 60 +++ tests/cleo_tests/Audio/0AAC.s | Bin 0 -> 434 bytes tests/cleo_tests/Audio/0AAC.txt | 44 ++ tests/cleo_tests/Audio/0AAD.s | Bin 0 -> 299 bytes tests/cleo_tests/Audio/0AAD.txt | 39 ++ tests/cleo_tests/Audio/0AAE.s | Bin 0 -> 285 bytes tests/cleo_tests/Audio/0AAE.txt | 39 ++ tests/cleo_tests/Audio/0AAF.s | Bin 0 -> 417 bytes tests/cleo_tests/Audio/0AAF.txt | 46 ++ tests/cleo_tests/Audio/0AB9.s | Bin 0 -> 843 bytes tests/cleo_tests/Audio/0AB9.txt | 76 ++++ tests/cleo_tests/Audio/0ABB.s | Bin 0 -> 426 bytes tests/cleo_tests/Audio/0ABB.txt | 46 ++ tests/cleo_tests/Audio/0ABC.s | Bin 0 -> 684 bytes tests/cleo_tests/Audio/0ABC.txt | 64 +++ tests/cleo_tests/Audio/0AC0.s | Bin 0 -> 802 bytes tests/cleo_tests/Audio/0AC0.txt | 76 ++++ tests/cleo_tests/Audio/0AC1.s | Bin 0 -> 449 bytes tests/cleo_tests/Audio/0AC1.txt | 44 ++ tests/cleo_tests/Audio/0AC2.s | Bin 0 -> 1099 bytes tests/cleo_tests/Audio/0AC2.txt | 86 ++++ tests/cleo_tests/Audio/0AC4.s | Bin 0 -> 644 bytes tests/cleo_tests/Audio/0AC4.txt | 61 +++ tests/cleo_tests/Audio/Ding.mp3 | Bin 0 -> 3311 bytes .../FilesystemOperations/0A99.s | Bin .../FilesystemOperations/0A99.txt | 0 .../FilesystemOperations/0A9A.s | Bin .../FilesystemOperations/0A9A.txt | 0 .../FilesystemOperations/0A9B.s | Bin .../FilesystemOperations/0A9B.txt | 0 .../FilesystemOperations/0A9C.s | Bin .../FilesystemOperations/0A9C.txt | 0 .../FilesystemOperations/0A9D.s | Bin .../FilesystemOperations/0A9D.txt | 0 .../{ => cleo_tests}/MemoryOperations/0A8C.s | Bin .../MemoryOperations/0A8C.txt | 0 .../{ => cleo_tests}/MemoryOperations/0A8D.s | Bin .../MemoryOperations/0A8D.txt | 0 .../{ => cleo_tests}/MemoryOperations/0A96.s | Bin .../MemoryOperations/0A96.txt | 0 .../{ => cleo_tests}/MemoryOperations/0A97.s | Bin .../MemoryOperations/0A97.txt | 0 .../{ => cleo_tests}/MemoryOperations/0A98.s | Bin .../MemoryOperations/0A98.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AC6.s | Bin .../MemoryOperations/0AC6.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AC7.s | Bin .../MemoryOperations/0AC7.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AC8.s | Bin .../MemoryOperations/0AC8.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AC9.s | Bin .../MemoryOperations/0AC9.txt | 0 .../MemoryOperations/0AE9.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AEA.s | Bin .../MemoryOperations/0AEA.txt | 0 .../{ => cleo_tests}/MemoryOperations/0AEB.s | Bin .../MemoryOperations/0AEB.txt | 0 .../MemoryOperations/0AEC.txt | 0 .../{ => cleo_tests}/MemoryOperations/2400.s | Bin .../MemoryOperations/2400.txt | 0 .../{ => cleo_tests}/MemoryOperations/2401.s | Bin .../MemoryOperations/2401.txt | 0 .../{ => cleo_tests}/MemoryOperations/2402.s | Bin .../MemoryOperations/2402.txt | 0 .../{ => cleo_tests}/MemoryOperations/2403.s | Bin .../MemoryOperations/2403.txt | 0 .../{ => cleo_tests}/MemoryOperations/2404.s | Bin .../MemoryOperations/2404.txt | 0 tests/cleo_tests_runner.cs | Bin 581 -> 0 bytes tests/test_file_read_write.txt | 205 --------- third-party/bass/bass.lib | Bin 25944 -> 0 bytes 99 files changed, 2079 insertions(+), 980 deletions(-) create mode 100644 cleo_plugins/Audio/Audio.cpp create mode 100644 cleo_plugins/Audio/Audio.filters create mode 100644 cleo_plugins/Audio/Audio.vcxproj create mode 100644 cleo_plugins/Audio/Audio.vcxproj.filters create mode 100644 cleo_plugins/Audio/C3DAudioStream.cpp create mode 100644 cleo_plugins/Audio/C3DAudioStream.h create mode 100644 cleo_plugins/Audio/CAudioStream.cpp create mode 100644 cleo_plugins/Audio/CAudioStream.h create mode 100644 cleo_plugins/Audio/CSoundSystem.cpp create mode 100644 cleo_plugins/Audio/CSoundSystem.h rename {third-party => cleo_plugins/Audio}/bass/bass.dll (100%) rename {third-party => cleo_plugins/Audio}/bass/bass.h (100%) delete mode 100644 source/CSoundSystem.cpp delete mode 100644 source/CSoundSystem.h rename tests/{cleo_tests_runner.txt => .cleo_tests_runner.txt} (97%) create mode 100644 tests/cleo_tests/0AEE.s create mode 100644 tests/cleo_tests/0AEE.txt create mode 100644 tests/cleo_tests/Audio/0AAC.s create mode 100644 tests/cleo_tests/Audio/0AAC.txt create mode 100644 tests/cleo_tests/Audio/0AAD.s create mode 100644 tests/cleo_tests/Audio/0AAD.txt create mode 100644 tests/cleo_tests/Audio/0AAE.s create mode 100644 tests/cleo_tests/Audio/0AAE.txt create mode 100644 tests/cleo_tests/Audio/0AAF.s create mode 100644 tests/cleo_tests/Audio/0AAF.txt create mode 100644 tests/cleo_tests/Audio/0AB9.s create mode 100644 tests/cleo_tests/Audio/0AB9.txt create mode 100644 tests/cleo_tests/Audio/0ABB.s create mode 100644 tests/cleo_tests/Audio/0ABB.txt create mode 100644 tests/cleo_tests/Audio/0ABC.s create mode 100644 tests/cleo_tests/Audio/0ABC.txt create mode 100644 tests/cleo_tests/Audio/0AC0.s create mode 100644 tests/cleo_tests/Audio/0AC0.txt create mode 100644 tests/cleo_tests/Audio/0AC1.s create mode 100644 tests/cleo_tests/Audio/0AC1.txt create mode 100644 tests/cleo_tests/Audio/0AC2.s create mode 100644 tests/cleo_tests/Audio/0AC2.txt create mode 100644 tests/cleo_tests/Audio/0AC4.s create mode 100644 tests/cleo_tests/Audio/0AC4.txt create mode 100644 tests/cleo_tests/Audio/Ding.mp3 rename tests/{ => cleo_tests}/FilesystemOperations/0A99.s (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A99.txt (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9A.s (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9A.txt (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9B.s (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9B.txt (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9C.s (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9C.txt (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9D.s (100%) rename tests/{ => cleo_tests}/FilesystemOperations/0A9D.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A8C.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A8C.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A8D.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A8D.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A96.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A96.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A97.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A97.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A98.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0A98.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC6.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC6.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC7.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC7.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC8.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC8.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC9.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AC9.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AE9.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AEA.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AEA.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AEB.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AEB.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/0AEC.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/2400.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/2400.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/2401.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/2401.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/2402.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/2402.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/2403.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/2403.txt (100%) rename tests/{ => cleo_tests}/MemoryOperations/2404.s (100%) rename tests/{ => cleo_tests}/MemoryOperations/2404.txt (100%) delete mode 100644 tests/cleo_tests_runner.cs delete mode 100644 tests/test_file_read_write.txt delete mode 100644 third-party/bass/bass.lib diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d501a961..1c220f58 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,11 +49,12 @@ jobs: mkdir .output\Release\cleo_readme @REM copy files - copy third-party\bass\bass.dll .output\Release\bass.dll copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.cleo5 .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins + copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll + xcopy /E /I tests\ .output\cleo @REM install Silent's ASI Loader curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip diff --git a/CHANGELOG.md b/CHANGELOG.md index 73acc8ff..54380bf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ ## 5.0.0 - support for CLEO modules feature https://github.com/sannybuilder/dev/issues/264 +- new [Audio](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/Audio) plugin + - audio related opcodes moved from CLEO core into separated plugin + - CLEO's audio now obey game's volume settings + - implemented Doppler effect for 3d audio streams (fast moving sound sources) + - CLEO's audio now follows game speed changes + - new opcode **2500 ([is_audio_stream_playing](https://library.sannybuilder.com/#/sa/audio/2500))** + - new opcode **2501 ([get_audio_stream_duration](https://library.sannybuilder.com/#/sa/audio/2501))** + - new opcode **2502 ([get_audio_stream_speed](https://library.sannybuilder.com/#/sa/audio/2502))** + - new opcode **2503 ([set_audio_stream_speed](https://library.sannybuilder.com/#/sa/audio/2503))** + - new opcode **2504 ([set_audio_stream_volume_with_transition](https://library.sannybuilder.com/#/sa/audio/2504))** + - new opcode **2505 ([set_audio_stream_speed_with_transition](https://library.sannybuilder.com/#/sa/audio/2505))** + - new opcode **2506 ([set_audio_stream_source_size](https://library.sannybuilder.com/#/sa/audio/2506))** - new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/debug/00C3))** - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/debug/00C4))** @@ -9,9 +21,9 @@ - new opcode **2102 ([log_to_file](https://library.sannybuilder.com/#/sa/debug/2102))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) - new [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin - - file related opcodes moved from CLEO into separated plugin + - file related opcodes moved from CLEO core into separated plugin - opcode **0A9E ([write_to_file](https://library.sannybuilder.com/#/sa/file/0A9E))** now supports literal numbers and strings - - fixed bug preventing file stream opcodes from working correctly for read-write modes + - fixed bug causing file stream opcodes not working correctly when read-write modes are used - fixed buffer overflows in file stream read opcodes - added/fixed support of all file stream opcodes in legacy mode (Cleo3) - new opcode **2300 ([get_file_position](https://library.sannybuilder.com/#/sa/file/2300))** @@ -19,7 +31,7 @@ - **2302 ([resolve_filepath](https://library.sannybuilder.com/#/sa/file/2302))** - **2303 ([get_script_filename](https://library.sannybuilder.com/#/sa/file/2303))** - new [MemoryOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/MemoryOperations) plugin - - memory related opcodes moved from CLEO into separated plugin + - memory related opcodes moved from CLEO core into separated plugin - validation of input and output parameters for all opcodes - opcode **0A8C ([write_memory](https://library.sannybuilder.com/#/sa/memory/0A8C))** now supports strings - new opcode **2400 ([copy_memory](https://library.sannybuilder.com/#/sa/memory/2400))** diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 6c41675f..0c7a0970 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -49,7 +49,6 @@ - @@ -77,7 +76,6 @@ - @@ -153,7 +151,7 @@ true true MultiThreaded - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;%(AdditionalIncludeDirectories) _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) Create @@ -164,8 +162,7 @@ true true UseLinkTimeCodeGeneration - $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) - bass.lib;%(AdditionalDependencies) + %(AdditionalDependencies) Windows $(SolutionDir)source\cleo.def false @@ -185,7 +182,7 @@ xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" Disabled true MultiThreadedDebug - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(SolutionDir)\third-party\bass;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;%(AdditionalIncludeDirectories) _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;_SCL_SECURE_NO_WARNINGS;GTASA;%(PreprocessorDefinitions);;TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) Create @@ -194,8 +191,9 @@ xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" true Default - $(SolutionDir)\third-party\bass;%(AdditionalLibraryDirectories) - bass.lib;%(AdditionalDependencies) + + + %(AdditionalDependencies) Windows $(SolutionDir)source\cleo.def false diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp new file mode 100644 index 00000000..d5009a9d --- /dev/null +++ b/cleo_plugins/Audio/Audio.cpp @@ -0,0 +1,363 @@ +#include "CLEO.h" +#include "CLEO_Utils.h" +#include "plugin.h" +#include "CTheScripts.h" +#include "CSoundSystem.h" +#include "CAudioStream.h" + +using namespace CLEO; +using namespace plugin; + +#define VALIDATE_STREAM() if(!soundSystem.HasStream(stream)) { SHOW_ERROR("Invalid or already closed '0x%X' audio stream handle param in script %s \nScript suspended.", stream, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + +class Audio +{ +public: + static CSoundSystem soundSystem; + + Audio() + { + auto cleoVer = CLEO_GetVersion(); + if (cleoVer < CLEO_VERSION) + { + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; + } + + // register opcodes + CLEO_RegisterOpcode(0x0AAC, opcode_0AAC); // load_audiostream + CLEO_RegisterOpcode(0x0AAD, opcode_0AAD); // set_audio_stream_state + CLEO_RegisterOpcode(0x0AAE, opcode_0AAE); // remove_audio_stream + CLEO_RegisterOpcode(0x0AAF, opcode_0AAF); // get_audiostream_length + + CLEO_RegisterOpcode(0x0AB9, opcode_0AB9); // get_audio_stream_state + + CLEO_RegisterOpcode(0x0ABB, opcode_0ABB); // get_audio_stream_volume + CLEO_RegisterOpcode(0x0ABC, opcode_0ABC); // set_audio_stream_volume + + CLEO_RegisterOpcode(0x0AC0, opcode_0AC0); // loop_audiostream + CLEO_RegisterOpcode(0x0AC1, opcode_0AC1); // load_audiostream_with_3d_support + CLEO_RegisterOpcode(0x0AC2, opcode_0AC2); // set_play_3d_audio_stream_at_coords + CLEO_RegisterOpcode(0x0AC3, opcode_0AC3); // set_play_3d_audio_stream_at_object + CLEO_RegisterOpcode(0x0AC4, opcode_0AC4); // set_play_3d_audio_stream_at_char + CLEO_RegisterOpcode(0x0AC5, opcode_0AC5); // set_play_3d_audio_stream_at_vehicle + + CLEO_RegisterOpcode(0x2500, opcode_2500); // is_audio_stream_playing + CLEO_RegisterOpcode(0x2501, opcode_2501); // get_audiostream_duration + CLEO_RegisterOpcode(0x2502, opcode_2502); // get_audio_stream_speed + CLEO_RegisterOpcode(0x2503, opcode_2503); // set_audio_stream_speed + CLEO_RegisterOpcode(0x2504, opcode_2504); // set_audio_stream_volume_with_transition + CLEO_RegisterOpcode(0x2505, opcode_2505); // set_audio_stream_speed_with_transition + CLEO_RegisterOpcode(0x2506, opcode_2506); // set_audio_stream_source_size + CLEO_RegisterOpcode(0x2507, opcode_2507); // get_audio_stream_progress + CLEO_RegisterOpcode(0x2508, opcode_2508); // set_audio_stream_progress + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); + CLEO_RegisterCallback(eCallbackId::GameProcess, OnGameProcess); + CLEO_RegisterCallback(eCallbackId::GameEnd, OnGameEnd); + CLEO_RegisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); + CLEO_RegisterCallback(eCallbackId::MainWindowFocus, OnMainWindowFocus); + } + + static void __stdcall OnGameBegin(DWORD saveSlot) + { + soundSystem.Init(); + } + + static void __stdcall OnGameProcess() + { + soundSystem.Process(); + } + + static void __stdcall OnGameEnd() + { + soundSystem.Clear(); + } + + static void __stdcall OnDrawingFinished() + { + if (CTimer::m_UserPause) // main menu visible + soundSystem.Process(); + } + + static void __stdcall OnMainWindowFocus(bool active) + { + if (active) + soundSystem.Resume(); + else + soundSystem.Pause(); + } + + + //0AAC=2, %2d% = load_audiostream %1d% // IF and SET + static OpcodeResult __stdcall opcode_0AAC(CScriptThread* thread) + { + auto path = OPCODE_READ_PARAM_FILEPATH(); + + auto ptr = soundSystem.CreateStream(path); + + OPCODE_WRITE_PARAM_PTR(ptr); + OPCODE_CONDITION_RESULT(ptr != nullptr); + return OR_CONTINUE; + } + + //0AAD=2,set_audiostream %1d% perform_action %2d% + static OpcodeResult __stdcall opcode_0AAD(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); VALIDATE_STREAM() + auto action = OPCODE_READ_PARAM_INT(); + + if (stream) + { + switch (action) + { + case 0: stream->Stop(); break; + case 1: stream->Play(); break; + case 2: stream->Pause(); break; + case 3: stream->Resume(); break; + default: + LOG_WARNING(thread, "Unknown audiostream's action (%d) in script %s", action, ScriptInfoStr(thread).c_str()); + } + } + + return OR_CONTINUE; + } + + //0AAE=1,release_audiostream %1d% + static OpcodeResult __stdcall opcode_0AAE(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + soundSystem.DestroyStream(stream); + + return OR_CONTINUE; + } + + //0AAF=2,%2d% = get_audiostream_length %1d% + static OpcodeResult __stdcall opcode_0AAF(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto length = stream->GetLength(); + + OPCODE_WRITE_PARAM_INT((int)length); + return OR_CONTINUE; + } + + //0AB9=2,get_audio_stream_state %1d% store_to %2d% + static OpcodeResult __stdcall opcode_0AB9(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto state = stream->GetState(); + + OPCODE_WRITE_PARAM_INT(state); + return OR_CONTINUE; + } + + //0ABB=2,%2d% = get_audio_stream_volume %1d% + static OpcodeResult __stdcall opcode_0ABB(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto volume = stream->GetVolume(); + + OPCODE_WRITE_PARAM_FLOAT(volume); + return OR_CONTINUE; + } + + //0ABC=2,set_audiostream %1d% volume %2d% + static OpcodeResult __stdcall opcode_0ABC(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto volume = OPCODE_READ_PARAM_FLOAT(); + + stream->SetVolume(volume); + + return OR_CONTINUE; + } + + //0AC0=2,loop_audiostream %1d% flag %2d% + static OpcodeResult __stdcall opcode_0AC0(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto loop = OPCODE_READ_PARAM_BOOL(); + + stream->SetLooping(loop); + + return OR_CONTINUE; + } + + //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET + static OpcodeResult __stdcall opcode_0AC1(CScriptThread* thread) + { + auto path = OPCODE_READ_PARAM_FILEPATH(); + + auto ptr = soundSystem.CreateStream(path, true); + + OPCODE_WRITE_PARAM_PTR(ptr); + OPCODE_CONDITION_RESULT(ptr != nullptr); + return OR_CONTINUE; + } + + //0AC2=4,set_3d_audiostream %1d% position %2d% %3d% %4d% + static OpcodeResult __stdcall opcode_0AC2(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + CVector pos; + pos.x = OPCODE_READ_PARAM_FLOAT(); + pos.y = OPCODE_READ_PARAM_FLOAT(); + pos.z = OPCODE_READ_PARAM_FLOAT(); + + stream->Set3dPosition(pos); + return OR_CONTINUE; + } + + //0AC3=2,link_3d_audiostream %1d% to_object %2d% + static OpcodeResult __stdcall opcode_0AC3(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto handle = OPCODE_READ_PARAM_OBJECT_HANDLE(); + + auto object = CPools::GetObject(handle); + stream->Link(object); + + return OR_CONTINUE; + } + + //0AC4=2,link_3d_audiostream %1d% to_actor %2d% + static OpcodeResult __stdcall opcode_0AC4(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto handle = OPCODE_READ_PARAM_PED_HANDLE(); + + auto ped = CPools::GetPed(handle); + stream->Link(ped); + + return OR_CONTINUE; + } + + //0AC5=2,link_3d_audiostream %1d% to_vehicle %2d% + static OpcodeResult __stdcall opcode_0AC5(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); + + auto vehicle = CPools::GetVehicle(handle); + stream->Link(vehicle); + + return OR_CONTINUE; + } + + //2500=1, is_audio_stream_playing %1d% + static OpcodeResult __stdcall opcode_2500(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto state = stream->GetState(); + + OPCODE_CONDITION_RESULT(state == 1); + return OR_CONTINUE; + } + + //2501=2,%2d% = get_audiostream_duration %1d% + static OpcodeResult __stdcall opcode_2501(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto length = stream->GetLength(); + + auto speed = stream->GetSpeed(); + if (speed <= 0.0f) + length = FLT_MAX; // it would take forever to play paused + else + length /= speed; // speed corrected + + OPCODE_WRITE_PARAM_FLOAT(length); + return OR_CONTINUE; + } + + //2502=2,get_audio_stream_speed %1d% store_to %2d% + static OpcodeResult __stdcall opcode_2502(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto speed = stream->GetSpeed(); + + OPCODE_WRITE_PARAM_FLOAT(speed); + return OR_CONTINUE; + } + + //2503=2,set_audio_stream_speed %1d% speed %2d% + static OpcodeResult __stdcall opcode_2503(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto speed = OPCODE_READ_PARAM_FLOAT(); + + stream->SetSpeed(speed); + + return OR_CONTINUE; + } + + //2504=3,set_audio_stream_volume_with_transition %1d% volume %2d% time_ms %2d% + static OpcodeResult __stdcall opcode_2504(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto volume = OPCODE_READ_PARAM_FLOAT(); + auto time = OPCODE_READ_PARAM_INT(); + + stream->SetVolume(volume, 0.001f * time); + + return OR_CONTINUE; + } + + //2505=3,set_audio_stream_speed_with_transition %1d% speed %2d% time_ms %2d% + static OpcodeResult __stdcall opcode_2505(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto speed = OPCODE_READ_PARAM_FLOAT(); + auto time = OPCODE_READ_PARAM_INT(); + + stream->SetSpeed(speed, 0.001f * time); + + return OR_CONTINUE; + } + + //2506=2,set_audio_stream_source_size %1d% radius %2d% + static OpcodeResult __stdcall opcode_2506(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto radius = OPCODE_READ_PARAM_FLOAT(); + + stream->Set3dSize(radius); + + return OR_CONTINUE; + } + + //2507=2,get_audio_stream_progress %1d% store_to %2d% + static OpcodeResult __stdcall opcode_2507(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + + auto progress = stream->GetProgress(); + + OPCODE_WRITE_PARAM_FLOAT(progress); + return OR_CONTINUE; + } + + //2508=2,set_audio_stream_progress %1d% speed %2d% + static OpcodeResult __stdcall opcode_2508(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto speed = OPCODE_READ_PARAM_FLOAT(); + + stream->SetProgress(speed); + + return OR_CONTINUE; + } +} audioInstance; + +CSoundSystem Audio::soundSystem; + diff --git a/cleo_plugins/Audio/Audio.filters b/cleo_plugins/Audio/Audio.filters new file mode 100644 index 00000000..757b525e --- /dev/null +++ b/cleo_plugins/Audio/Audio.filters @@ -0,0 +1,43 @@ + + + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + + {1903661c-d3a7-4f51-8910-54b32282a46d} + + + {0c8900ae-85e5-4dc1-9d7b-173b6f8cd435} + + + + + cleo_sdk + + + cleo_sdk + + + \ No newline at end of file diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj new file mode 100644 index 00000000..4ae049fa --- /dev/null +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -0,0 +1,156 @@ + + + + + Release + Win32 + + + Debug + Win32 + + + + {897344A5-1AF1-493A-8B0B-196C0423D5DA} + true + Win32Proj + Audio + 10.0 + Audio + + + + DynamicLibrary + false + MultiByte + v143 + true + + + DynamicLibrary + true + MultiByte + v143 + + + + + + + + + + + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + Audio + .cleo5 + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + Audio + .cleo5 + + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;$(SolutionDir)..\third-party\bass;%(AdditionalIncludeDirectories) + stdcpp17 + + + true + true + true + UseLinkTimeCodeGeneration + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)bass\;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + Level3 + Disabled + true + MultiThreadedDebug + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)\bass\;%(AdditionalIncludeDirectories) + stdcpp17 + + + true + Default + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)bass\;%(AdditionalLibraryDirectories) + cleo.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + NotUsing + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cleo_plugins/Audio/Audio.vcxproj.filters b/cleo_plugins/Audio/Audio.vcxproj.filters new file mode 100644 index 00000000..6edb1ae1 --- /dev/null +++ b/cleo_plugins/Audio/Audio.vcxproj.filters @@ -0,0 +1,70 @@ + + + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + + + + cleo_sdk + + + cleo_sdk + + + + bass + + + + + + + {cd3f4d0b-2948-4ccd-87c9-c05ebb973b25} + + + {995941cc-43c5-43e9-a674-5916cf70a5f7} + + + {06b76bd2-09a7-4369-b83b-0298428ebb4f} + + + \ No newline at end of file diff --git a/cleo_plugins/Audio/C3DAudioStream.cpp b/cleo_plugins/Audio/C3DAudioStream.cpp new file mode 100644 index 00000000..fe7f637a --- /dev/null +++ b/cleo_plugins/Audio/C3DAudioStream.cpp @@ -0,0 +1,81 @@ +#include "C3DAudioStream.h" +#include "CSoundSystem.h" +#include "CLEO_Utils.h" + +using namespace CLEO; + +C3DAudioStream::C3DAudioStream(const char* filepath) : CAudioStream() +{ + unsigned flags = BASS_SAMPLE_3D | BASS_SAMPLE_MONO | BASS_SAMPLE_SOFTWARE; + if (CSoundSystem::useFloatAudio) flags |= BASS_SAMPLE_FLOAT; + + if (!(streamInternal = BASS_StreamCreateFile(FALSE, filepath, 0, 0, flags)) && + !(streamInternal = BASS_StreamCreateURL(filepath, 0, flags, nullptr, nullptr))) + { + LOG_WARNING(0, "Loading 3d-audiostream %s failed. Error code: %d", filepath, BASS_ErrorGetCode()); + return; + } + + BASS_ChannelGetAttribute(streamInternal, BASS_ATTRIB_FREQ, &rate); + BASS_ChannelSet3DAttributes(streamInternal, BASS_3DMODE_NORMAL, 3.0f, 1E+12f, -1, -1, -1.0f); + ok = true; +} + +C3DAudioStream::~C3DAudioStream() +{ + if (streamInternal) BASS_StreamFree(streamInternal); +} + +void C3DAudioStream::Set3dPosition(const CVector& pos) +{ + link = nullptr; + position.x = pos.y; + position.y = pos.z; + position.z = pos.x; + BASS_3DVECTOR vel = { 0.0f, 0.0f, 0.0f }; + + BASS_ChannelSet3DPosition(streamInternal, &position, nullptr, &vel); +} + +void C3DAudioStream::Set3dSourceSize(float radius) +{ + BASS_ChannelSet3DAttributes(streamInternal, BASS_3DMODE_NORMAL, radius, 1E+12f, -1, -1, -1.0f); +} + +void C3DAudioStream::Link(CPlaceable* placable) +{ + link = placable; +} + +void C3DAudioStream::Process() +{ + CAudioStream::Process(); + + if (state != Playing) return; // done + + UpdatePosition(); +} + +void C3DAudioStream::UpdatePosition() +{ + if (link) // attached to entity + { + auto prevPos = position; + CVector* pVec = link->m_matrix ? &link->m_matrix->pos : &link->m_placement.m_vPosn; + position = BASS_3DVECTOR(pVec->y, pVec->z, pVec->x); + + + // calculate velocity + BASS_3DVECTOR vel = position; + vel.x -= prevPos.x; + vel.y -= prevPos.y; + vel.z -= prevPos.z; + auto timeDelta = 0.001f * (CTimer::m_snTimeInMillisecondsNonClipped - CTimer::m_snPreviousTimeInMillisecondsNonClipped); + vel.x *= timeDelta; + vel.y *= timeDelta; + vel.z *= timeDelta; + + BASS_ChannelSet3DPosition(streamInternal, &position, nullptr, &vel); + } +} + diff --git a/cleo_plugins/Audio/C3DAudioStream.h b/cleo_plugins/Audio/C3DAudioStream.h new file mode 100644 index 00000000..fc319838 --- /dev/null +++ b/cleo_plugins/Audio/C3DAudioStream.h @@ -0,0 +1,26 @@ +#pragma once +#include "CAudioStream.h" + +namespace CLEO +{ + class C3DAudioStream : public CAudioStream + { + public: + C3DAudioStream(const char* filepath); + virtual ~C3DAudioStream(); + + // overloaded actions + virtual void Set3dPosition(const CVector& pos); + virtual void Set3dSourceSize(float radius); + virtual void Link(CPlaceable* placable = nullptr); + virtual void Process(); + + protected: + CPlaceable* link = nullptr; + BASS_3DVECTOR position = { 0.0f, 0.0f, 0.0f }; + + C3DAudioStream(const C3DAudioStream&) = delete; // no copying! + void UpdatePosition(); + }; +} + diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp new file mode 100644 index 00000000..f6c7d5af --- /dev/null +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -0,0 +1,212 @@ +#include "CAudioStream.h" +#include "CSoundSystem.h" +#include "CLEO_Utils.h" + +using namespace CLEO; + +CAudioStream::CAudioStream(const char* filepath) +{ + unsigned flags = BASS_SAMPLE_SOFTWARE; + if (CSoundSystem::useFloatAudio) flags |= BASS_SAMPLE_FLOAT; + + if (!(streamInternal = BASS_StreamCreateFile(FALSE, filepath, 0, 0, flags)) && + !(streamInternal = BASS_StreamCreateURL(filepath, 0, flags, 0, nullptr))) + { + LOG_WARNING(0, "Loading audiostream %s failed. Error code: %d", filepath, BASS_ErrorGetCode()); + return; + } + + BASS_ChannelGetAttribute(streamInternal, BASS_ATTRIB_FREQ, &rate); + ok = true; +} + +CAudioStream::~CAudioStream() +{ + if (streamInternal) BASS_StreamFree(streamInternal); +} + +void CAudioStream::Play() +{ + if (state == Stopped) BASS_ChannelSetPosition(streamInternal, 0, BASS_POS_BYTE); // rewind + state = PlayingInactive; // needs to be processed +} + +void CAudioStream::Pause(bool changeState) +{ + if (GetState() == Playing) + { + BASS_ChannelPause(streamInternal); + state = changeState ? Paused : PlayingInactive; + } +} + +void CAudioStream::Stop() +{ + BASS_ChannelPause(streamInternal); + state = Stopped; +} + +void CAudioStream::Resume() +{ + Play(); +} + +float CAudioStream::GetLength() const +{ + return (float)BASS_ChannelBytes2Seconds(streamInternal, BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE)); +} + +void CAudioStream::SetProgress(float value) +{ + value = std::clamp(value, 0.0f, 1.0f); + auto total = BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE); + auto bytePos = total * value; + BASS_ChannelSetPosition(streamInternal, bytePos, BASS_POS_BYTE); +} + +float CAudioStream::GetProgress() const +{ + auto total = BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE); + auto bytePos = BASS_ChannelGetPosition(streamInternal, BASS_POS_BYTE); + + float progress = (float)bytePos / total; + progress = std::clamp(progress, 0.0f, 1.0f); + return progress; +} + +CAudioStream::eStreamState CAudioStream::GetState() const +{ + return (state == PlayingInactive) ? Playing : state; +} + +void CAudioStream::SetLooping(bool enable) +{ + BASS_ChannelFlags(streamInternal, enable ? BASS_SAMPLE_LOOP : 0, BASS_SAMPLE_LOOP); +} + +bool CLEO::CAudioStream::GetLooping() const +{ + return (BASS_ChannelFlags(streamInternal, 0, 0) & BASS_SAMPLE_LOOP) != 0; +} + +void CAudioStream::SetVolume(float value, float transitionTime) +{ + if (transitionTime > 0.0f) Resume(); + + value = max(value, 0.0f); + volumeTarget = value; + + if (transitionTime <= 0.0) + volume = value; // instant + else + volumeTransitionStep = (volumeTarget - volume) / (1000.0 * transitionTime); +} + +float CAudioStream::GetVolume() const +{ + return (float)volume; +} + +void CAudioStream::SetSpeed(float value, float transitionTime) +{ + if (transitionTime > 0.0f) Resume(); + + value = max(value, 0.0f); + speedTarget = value; + + if (transitionTime <= 0.0) + speed = value; // instant + else + speedTransitionStep = (speedTarget - speed) / (1000.0 * transitionTime); +} + +float CAudioStream::GetSpeed() const +{ + return (float)speed; +} + +void CAudioStream::UpdateVolume() +{ + if (volume != volumeTarget) + { + auto timeDelta = CTimer::m_snTimeInMillisecondsNonClipped - CTimer::m_snPreviousTimeInMillisecondsNonClipped; + volume += volumeTransitionStep * (double)timeDelta; // animate the transition + + // check progress + auto remaining = volumeTarget - volume; + remaining *= (volumeTransitionStep > 0.0) ? 1.0 : -1.0; + if (remaining < 0.0) // overshoot + { + volume = volumeTarget; + if (volume <= 0.0f) Pause(); + } + } + + BASS_ChannelSetAttribute(streamInternal, BASS_ATTRIB_VOL, (float)volume * CSoundSystem::masterVolume); +} + +void CAudioStream::UpdateSpeed() +{ + if (speed != speedTarget) + { + auto timeDelta = CTimer::m_snTimeInMillisecondsNonClipped - CTimer::m_snPreviousTimeInMillisecondsNonClipped; + speed += speedTransitionStep * (double)timeDelta; // animate the transition + + // check progress + auto remaining = speedTarget - speed; + remaining *= (speedTransitionStep > 0.0) ? 1.0 : -1.0; + if (remaining < 0.0) // overshoot + { + speed = speedTarget; // done + if (speed <= 0.0f) Pause(); + } + } + + float freq = rate * (float)speed * CSoundSystem::masterSpeed; + freq = max(freq, 0.000001f); // 0 results in original speed + BASS_ChannelSetAttribute(streamInternal, BASS_ATTRIB_FREQ, freq); +} + +bool CAudioStream::IsOk() const +{ + return ok; +} + +HSTREAM CAudioStream::GetInternal() +{ + return streamInternal; +} + +void CAudioStream::Process() +{ + if (state == PlayingInactive) + { + BASS_ChannelPlay(streamInternal, FALSE); + state = Playing; + } + + if (!GetLooping() && GetProgress() >= 1.0f) // end reached + { + state = Stopped; + } + + if (state != Playing) return; // done + + UpdateSpeed(); + UpdateVolume(); +} + +void CAudioStream::Set3dPosition(const CVector& pos) +{ + // not applicable for 2d audio +} + +void CAudioStream::Set3dSize(float radius) +{ + // not applicable for 2d audio +} + +void CAudioStream::Link(CPlaceable* placable) +{ + // not applicable for 2d audio +} diff --git a/cleo_plugins/Audio/CAudioStream.h b/cleo_plugins/Audio/CAudioStream.h new file mode 100644 index 00000000..d1302888 --- /dev/null +++ b/cleo_plugins/Audio/CAudioStream.h @@ -0,0 +1,71 @@ +#pragma once +#include "plugin.h" +#include "bass.h" + +namespace CLEO +{ + class CAudioStream + { + public: + enum eStreamState + { + Stopped = -1, + PlayingInactive, // internal: playing, but not processed yet or the sound system is silenced right now + Playing, + Paused, + }; + + CAudioStream(const char* filepath); // filesystem or URL + virtual ~CAudioStream(); + + bool IsOk() const; + HSTREAM GetInternal(); // get BASS stream + + eStreamState GetState() const; + void Play(); + void Pause(bool changeState = true); + void Stop(); + void Resume(); + + void SetLooping(bool enable); + bool GetLooping() const; + + float GetLength() const; + + void SetProgress(float value); + float GetProgress() const; + + void SetSpeed(float value, float transitionTime = 0.0f); + float GetSpeed() const; + + void SetVolume(float value, float transitionTime = 0.0f); + float GetVolume() const; + + // 3d + virtual void Set3dPosition(const CVector& pos); + virtual void Set3dSize(float radius); + virtual void Link(CPlaceable* placable = nullptr); + + virtual void Process(); + + protected: + HSTREAM streamInternal = 0; + eStreamState state = Paused; + bool ok = false; + float rate = 44100.0f; // file's sampling rate + double speed = 1.0f; + double volume = 1.0f; + + // transitions + double volumeTarget = 1.0f; + double volumeTransitionStep = 1.0f; + double speedTarget = 1.0f; + double speedTransitionStep = 1.0f; + + CAudioStream() = default; + CAudioStream(const CAudioStream&) = delete; // no copying! + + void UpdateVolume(); + void UpdateSpeed(); + }; +} diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp new file mode 100644 index 00000000..fee34ee8 --- /dev/null +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -0,0 +1,206 @@ +#include "CSoundSystem.h" +#include "CAudioStream.h" +#include "C3dAudioStream.h" +#include "CLEO_Utils.h" +#include "CAEAudioHardware.h" +#include "CCamera.h" + +namespace CLEO +{ + bool CSoundSystem::useFloatAudio = false; + BASS_3DVECTOR CSoundSystem::pos(0.0, 0.0, 0.0); + BASS_3DVECTOR CSoundSystem::vel(0.0, 0.0, 0.0); + BASS_3DVECTOR CSoundSystem::front(0.0, -1.0, 0.0); + BASS_3DVECTOR CSoundSystem::top(0.0, 0.0, 1.0); + float CSoundSystem::masterSpeed = 1.0f; + float CSoundSystem::masterVolume = 1.0f; + + void EnumerateBassDevices(int& total, int& enabled, int& default_device) + { + BASS_DEVICEINFO info; + for (default_device = -1, enabled = 0, total = 0; BASS_GetDeviceInfo(total, &info); ++total) + { + if (info.flags & BASS_DEVICE_ENABLED) ++enabled; + if (info.flags & BASS_DEVICE_DEFAULT) default_device = total; + TRACE("Found sound device %d%s: %s", total, default_device == total ? + " (default)" : "", info.name); + } + } + + CSoundSystem::~CSoundSystem() + { + TRACE("Finalizing SoundSystem..."); + Clear(); + + if (initialized) + { + TRACE("Freeing BASS library"); + BASS_Free(); + initialized = false; + } + TRACE("SoundSystem finalized"); + } + + bool CSoundSystem::Init() + { + if (initialized) return true; // already done + + int default_device, total_devices, enabled_devices; + EnumerateBassDevices(total_devices, enabled_devices, default_device); + + BASS_DEVICEINFO info = { nullptr, nullptr, 0 }; + if (forceDevice != -1 && BASS_GetDeviceInfo(forceDevice, &info) && + info.flags & BASS_DEVICE_ENABLED) + default_device = forceDevice; + + TRACE("On system found %d devices, %d enabled devices, assuming device to use: %d (%s)", + total_devices, enabled_devices, default_device, BASS_GetDeviceInfo(default_device, &info) ? + info.name : "Unknown device"); + + if (BASS_Init(default_device, 44100, BASS_DEVICE_3D, RsGlobal.ps->window, nullptr) && + BASS_Set3DFactors(1.0f, 3.0f, 80.0f) && + BASS_Set3DPosition(&pos, &vel, &front, &top)) + { + TRACE("SoundSystem initialized"); + + // Can we use floating-point (HQ) audio streams? + DWORD floatable = BASS_StreamCreate(44100, 1, BASS_SAMPLE_FLOAT, NULL, NULL); // floating-point channel support? 0 = no, else yes + if (floatable) + { + TRACE("Floating-point audio supported!"); + useFloatAudio = true; + BASS_StreamFree(floatable); + } + else TRACE("Floating-point audio not supported!"); + + if (BASS_GetInfo(&SoundDevice)) + { + if (SoundDevice.flags & DSCAPS_EMULDRIVER) + TRACE("Audio drivers not installed - using DirectSound emulation"); + if (!SoundDevice.eax) + TRACE("Audio hardware acceleration disabled (no EAX)"); + } + + initialized = true; + BASS_Apply3D(); + return true; + } + + LOG_WARNING(0, "Could not initialize BASS sound system. Error code: %d", BASS_ErrorGetCode()); + return false; + } + + bool CSoundSystem::Initialized() + { + return initialized; + } + + CAudioStream* CSoundSystem::CreateStream(const char *filename, bool in3d) + { + CAudioStream* result = in3d ? new C3DAudioStream(filename) : new CAudioStream(filename); + if (!result->IsOk()) + { + delete result; + return nullptr; + } + + streams.insert(result); + return result; + } + + void CSoundSystem::DestroyStream(CAudioStream *stream) + { + if (streams.erase(stream)) + delete stream; + else + TRACE("Unloading of stream that is not in list of loaded streams"); + } + + bool CSoundSystem::HasStream(CAudioStream* stream) + { + return streams.find(stream) != streams.end(); + } + + void CSoundSystem::Clear() + { + for (auto stream : streams) + { + delete stream; + }; + streams.clear(); + } + + void CSoundSystem::Resume() + { + paused = false; + for (auto stream : streams) + { + if(stream->GetState() == CAudioStream::Playing) stream->Resume(); + } + } + + void CSoundSystem::Pause() + { + paused = true; + for (auto stream : streams) + { + stream->Pause(false); + }; + } + + void CSoundSystem::Process() + { + if (CTimer::m_UserPause || CTimer::m_CodePause) // covers menu pausing, no disc in drive pausing, etc. + { + if (!paused) Pause(); + } + else // not in menu + { + if (paused) Resume(); + + // get game globals + masterSpeed = CTimer::ms_fTimeScale; + masterVolume = AEAudioHardware.m_fEffectMasterScalingFactor * 0.5f; // fit to game's sfx volume + + // camera movements + CMatrixLink * pMatrix = nullptr; + CVector * pVec = nullptr; + if (TheCamera.m_matrix) + { + pMatrix = TheCamera.m_matrix; + pVec = &pMatrix->pos; + } + else pVec = &TheCamera.m_placement.m_vPosn; + + BASS_3DVECTOR prevPos = pos; + pos = BASS_3DVECTOR(pVec->y, pVec->z, pVec->x); + + // calculate velocity + vel = prevPos; + vel.x -= pos.x; + vel.y -= pos.y; + vel.z -= pos.z; + auto timeDelta = 0.001f * (CTimer::m_snTimeInMillisecondsNonClipped - CTimer::m_snPreviousTimeInMillisecondsNonClipped); + vel.x *= timeDelta; + vel.y *= timeDelta; + vel.z *= timeDelta; + + // setup the ears + if (!TheCamera.m_bJust_Switched && !TheCamera.m_bCameraJustRestored) // avoid camera change/jump cut velocity glitches + { + BASS_Set3DPosition( + &pos, + &vel, + pMatrix ? &BASS_3DVECTOR(pMatrix->at.y, pMatrix->at.z, pMatrix->at.x) : nullptr, + pMatrix ? &BASS_3DVECTOR(pMatrix->up.y, pMatrix->up.z, pMatrix->up.x) : nullptr + ); + } + + // process streams + for(auto stream : streams) stream->Process(); + + // apply above changes + BASS_Apply3D(); + } + } +} diff --git a/cleo_plugins/Audio/CSoundSystem.h b/cleo_plugins/Audio/CSoundSystem.h new file mode 100644 index 00000000..abcc7cb1 --- /dev/null +++ b/cleo_plugins/Audio/CSoundSystem.h @@ -0,0 +1,49 @@ +#pragma once +#include "bass.h" +#include + +#pragma comment(lib, "bass.lib") + +namespace CLEO +{ + class CAudioStream; + class C3DAudioStream; + + class CSoundSystem + { + friend class CAudioStream; + friend class C3DAudioStream; + + std::set streams; + BASS_INFO SoundDevice = { 0 }; + bool initialized = false; + int forceDevice = -1; + bool paused = false; + + static bool useFloatAudio; + + static BASS_3DVECTOR pos; + static BASS_3DVECTOR vel; + static BASS_3DVECTOR front; + static BASS_3DVECTOR top; + static float masterSpeed; // game simulation speed + static float masterVolume; + + public: + CSoundSystem() = default; // TODO: give to user an ability to force a sound device to use (ini-file or cmd-line?) + ~CSoundSystem(); + + bool Init(); + bool Initialized(); + + CAudioStream* CreateStream(const char *filename, bool in3d = false); + void DestroyStream(CAudioStream *stream); + + bool HasStream(CAudioStream* stream); + void Clear(); // destroy all created streams + + void Pause(); + void Resume(); + void Process(); + }; +} diff --git a/third-party/bass/bass.dll b/cleo_plugins/Audio/bass/bass.dll similarity index 100% rename from third-party/bass/bass.dll rename to cleo_plugins/Audio/bass/bass.dll diff --git a/third-party/bass/bass.h b/cleo_plugins/Audio/bass/bass.h similarity index 100% rename from third-party/bass/bass.h rename to cleo_plugins/Audio/bass/bass.h diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln index 2960e5f4..62d55f52 100644 --- a/cleo_plugins/CLEO_Plugins.sln +++ b/cleo_plugins/CLEO_Plugins.sln @@ -13,6 +13,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DebugUtils", "DebugUtils\De EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryOperations", "MemoryOperations\MemoryOperations.vcxproj", "{35C80F79-8B18-4925-8C32-94B320DBE76F}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Audio", "Audio\Audio.vcxproj", "{897344A5-1AF1-493A-8B0B-196C0423D5DA}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x86 = Debug|x86 @@ -39,6 +41,10 @@ Global {35C80F79-8B18-4925-8C32-94B320DBE76F}.Debug|x86.Build.0 = Debug|Win32 {35C80F79-8B18-4925-8C32-94B320DBE76F}.Release|x86.ActiveCfg = Release|Win32 {35C80F79-8B18-4925-8C32-94B320DBE76F}.Release|x86.Build.0 = Release|Win32 + {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Debug|x86.ActiveCfg = Debug|Win32 + {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Debug|x86.Build.0 = Debug|Win32 + {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Release|x86.ActiveCfg = Release|Win32 + {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 2c10e866..0d10c0d7 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -288,6 +288,7 @@ const char DIR_MODULES[] = "modules:"; // game\cleo\modules directory enum class eCallbackId : DWORD { GameBegin, // void WINAPI OnGameBegin(DWORD saveSlot); // -1 if not started from save + GameProcess, // void WINAPI OnGameProcess(); // called once every frame during gameplay GameEnd, // void WINAPI OnGameEnd(); ScriptsLoaded, // void WINAPI OnScriptsLoaded(); ScriptsFinalize, // void WINAPI OnScriptsFinalize(); @@ -299,6 +300,7 @@ enum class eCallbackId : DWORD ScriptDraw, // void WINAPI OnScriptDraw(bool beforeFade); DrawingFinished, // void WINAPI OnDrawingFinished(); // called after game rendered everything and before presenting screen buffer Log, // void OnLog(eLogLevel level, const char* msg); + MainWindowFocus, // void WINAPI OnMainWindowFocus(bool active); // called when game main window focus changes }; // used by CLEO_Log and Log callback @@ -322,7 +324,6 @@ typedef int SCRIPT_HANDLE; typedef SCRIPT_HANDLE HANDLE_ACTOR, ACTOR, HACTOR, PED, HPED, HANDLE_PED; typedef SCRIPT_HANDLE HANDLE_CAR, CAR, HCAR, VEHICLE, HVEHICLE, HANDLE_VEHICLE; typedef SCRIPT_HANDLE HANDLE_OBJECT, OBJECT, HOBJECT; -typedef SCRIPT_HANDLE HSTREAM; #pragma pack(push,1) #ifdef __cplusplus @@ -522,7 +523,7 @@ void WINAPI CLEO_RemoveScriptDeleteDelegate(FuncScriptDeleteDelegateT func); DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret RwTexture * -HSTREAM WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * +DWORD WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index bb14ce6d..78a81036 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -417,55 +417,55 @@ namespace CLEO #define OPCODE_READ_PARAM_OBJECT_HANDLE() _readParam(thread).dwParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (IsObjectHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid object handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + else if (!IsObjectHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid object handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_PED_HANDLE() _readParam(thread).dwParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (IsPedHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid character handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + else if (!IsPedHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid character handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_VEHICLE_HANDLE() _readParam(thread).dwParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + else if (!IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR() _readParamVariable(thread); \ if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_INT() _readParamVariable(thread); \ if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() _readParamVariable(thread); \ if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type #define OPCODE_WRITE_PARAM_BOOL(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT8(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT8(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT16(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT16(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ - if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, (void*)value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index de92cc6a..04b3fb53 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -29,10 +29,6 @@ namespace CLEO OpcodeResult __stdcall opcode_0AA0(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA1(CRunningScript *thread); OpcodeResult __stdcall opcode_0AA9(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AAD(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AAE(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AAF(CRunningScript *thread); OpcodeResult __stdcall opcode_0AB0(CRunningScript *thread); OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread); OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread); @@ -42,19 +38,10 @@ namespace CLEO OpcodeResult __stdcall opcode_0AB6(CRunningScript *thread); OpcodeResult __stdcall opcode_0AB7(CRunningScript *thread); OpcodeResult __stdcall opcode_0AB8(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB9(CRunningScript *thread); OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABB(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABC(CRunningScript *thread); OpcodeResult __stdcall opcode_0ABD(CRunningScript *thread); OpcodeResult __stdcall opcode_0ABE(CRunningScript *thread); OpcodeResult __stdcall opcode_0ABF(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC0(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC2(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC4(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AC5(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread); OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread); @@ -214,11 +201,7 @@ namespace CLEO { TRACE("Cleaning up script data..."); - for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsFinalize)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } + GetInstance().CallCallbacks(eCallbackId::ScriptsFinalize); // clean up after opcode_0AB1 ScmFunction::Clear(); @@ -234,10 +217,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); CLEO_RegisterOpcode(0x0AA9, opcode_0AA9); - CLEO_RegisterOpcode(0x0AAC, opcode_0AAC); - CLEO_RegisterOpcode(0x0AAD, opcode_0AAD); - CLEO_RegisterOpcode(0x0AAE, opcode_0AAE); - CLEO_RegisterOpcode(0x0AAF, opcode_0AAF); CLEO_RegisterOpcode(0x0AB0, opcode_0AB0); CLEO_RegisterOpcode(0x0AB1, opcode_0AB1); CLEO_RegisterOpcode(0x0AB2, opcode_0AB2); @@ -247,19 +226,10 @@ namespace CLEO CLEO_RegisterOpcode(0x0AB6, opcode_0AB6); CLEO_RegisterOpcode(0x0AB7, opcode_0AB7); CLEO_RegisterOpcode(0x0AB8, opcode_0AB8); - CLEO_RegisterOpcode(0x0AB9, opcode_0AB9); CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); - CLEO_RegisterOpcode(0x0ABB, opcode_0ABB); - CLEO_RegisterOpcode(0x0ABC, opcode_0ABC); CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); - CLEO_RegisterOpcode(0x0AC0, opcode_0AC0); - CLEO_RegisterOpcode(0x0AC1, opcode_0AC1); - CLEO_RegisterOpcode(0x0AC2, opcode_0AC2); - CLEO_RegisterOpcode(0x0AC3, opcode_0AC3); - CLEO_RegisterOpcode(0x0AC4, opcode_0AC4); - CLEO_RegisterOpcode(0x0AC5, opcode_0AC5); CLEO_RegisterOpcode(0x0ACA, opcode_0ACA); CLEO_RegisterOpcode(0x0ACB, opcode_0ACB); CLEO_RegisterOpcode(0x0ACC, opcode_0ACC); @@ -985,57 +955,6 @@ namespace CLEO return OR_CONTINUE; } - //0AAC=2, %2d% = load_audiostream %1d% // IF and SET - OpcodeResult __stdcall opcode_0AAC(CRunningScript *thread) - { - auto path = OPCODE_READ_PARAM_STRING(); - auto filename = reinterpret_cast(thread)->ResolvePath(path); - - auto stream = GetInstance().SoundSystem.LoadStream(filename.c_str()); - *thread << stream; - SetScriptCondResult(thread, stream != nullptr); - return OR_CONTINUE; - } - - //0AAD=2,set_audiostream %1d% perform_action %2d% - OpcodeResult __stdcall opcode_0AAD(CRunningScript *thread) - { - CAudioStream *stream; - int action; - *thread >> stream >> action; - if (stream) - { - switch (action) - { - case 0: stream->Stop(); break; - case 1: stream->Play(); break; - case 2: stream->Pause(); break; - case 3: stream->Resume(); break; - default: - LOG_WARNING(thread, "[0AAD] Unknown audiostream's action (%d) in script %s", action, ((CCustomScript*)thread)->GetInfoStr().c_str()); - } - } - return OR_CONTINUE; - } - - //0AAE=1,release_audiostream %1d% - OpcodeResult __stdcall opcode_0AAE(CRunningScript *thread) - { - CAudioStream *stream; - *thread >> stream; - if (stream) GetInstance().SoundSystem.UnloadStream(stream); - return OR_CONTINUE; - } - - //0AAF=2,%2d% = get_audiostream_length %1d% - OpcodeResult __stdcall opcode_0AAF(CRunningScript *thread) - { - CAudioStream *stream; - *thread >> stream; - *thread << (stream ? stream->GetLength() : -1); - return OR_CONTINUE; - } - //0AB0=1, key_pressed %1d% OpcodeResult __stdcall opcode_0AB0(CRunningScript *thread) { @@ -1361,15 +1280,6 @@ namespace CLEO return OR_CONTINUE; } - //0AB9=2,get_audiostream %1d% state_to %2d% - OpcodeResult __stdcall opcode_0AB9(CRunningScript *thread) - { - CAudioStream *stream; - *thread >> stream; - *thread << (stream ? stream->GetState() : -1); - return OR_CONTINUE; - } - //0ABA=1,end_custom_thread_named %1d% OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) { @@ -1383,25 +1293,6 @@ namespace CLEO return deleted_thread == thread ? OR_INTERRUPT : OR_CONTINUE; } - //0ABB=2,%2d% = audiostream %1d% volume - OpcodeResult __stdcall opcode_0ABB(CRunningScript *thread) - { - CAudioStream *stream; - *thread >> stream; - *thread << (stream ? stream->GetVolume() : 0.0f); - return OR_CONTINUE; - } - - //0ABC=2,set_audiostream %1d% volume %2d% - OpcodeResult __stdcall opcode_0ABC(CRunningScript *thread) - { - CAudioStream *stream; - float volume; - *thread >> stream >> volume; - if (stream) stream->SetVolume(volume); - return OR_CONTINUE; - } - //0ABD=1, vehicle %1d% siren_on OpcodeResult __stdcall opcode_0ABD(CRunningScript *thread) { @@ -1438,76 +1329,6 @@ namespace CLEO return OR_CONTINUE; } - //0AC0=2,loop_audiostream %1d% flag %2d% - OpcodeResult __stdcall opcode_0AC0(CRunningScript *thread) - { - CAudioStream *stream; - DWORD loop; - *thread >> stream >> loop; - if (stream) stream->Loop(loop != false); - return OR_CONTINUE; - } - - //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET - OpcodeResult __stdcall opcode_0AC1(CRunningScript *thread) - { - auto path = OPCODE_READ_PARAM_STRING(); - - auto stream = GetInstance().SoundSystem.LoadStream(path, true); - *thread << stream; - SetScriptCondResult(thread, stream != nullptr); - return OR_CONTINUE; - } - - //0AC2=4,set_3d_audiostream %1d% position %2d% %3d% %4d% - OpcodeResult __stdcall opcode_0AC2(CRunningScript *thread) - { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); - CVector pos; - pos.x = OPCODE_READ_PARAM_FLOAT(); - pos.y = OPCODE_READ_PARAM_FLOAT(); - pos.z = OPCODE_READ_PARAM_FLOAT(); - - stream->Set3dPosition(pos); - return OR_CONTINUE; - } - - //0AC3=2,link_3d_audiostream %1d% to_object %2d% - OpcodeResult __stdcall opcode_0AC3(CRunningScript *thread) - { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); - auto handle = OPCODE_READ_PARAM_OBJECT_HANDLE(); - - auto object = CPools::GetObject(handle); - - stream->Link(object); - return OR_CONTINUE; - } - - //0AC4=2,link_3d_audiostream %1d% to_actor %2d% - OpcodeResult __stdcall opcode_0AC4(CRunningScript *thread) - { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); - auto handle = OPCODE_READ_PARAM_PED_HANDLE(); - - auto ped = CPools::GetPed(handle); - - stream->Link(ped); - return OR_CONTINUE; - } - - //0AC5=2,link_3d_audiostream %1d% to_vehicle %2d% - OpcodeResult __stdcall opcode_0AC5(CRunningScript *thread) - { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); - auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); - - auto vehicle = CPools::GetVehicle(handle); - - stream->Link(vehicle); - return OR_CONTINUE; - } - //0ACA=1,show_text_box %1d% OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) { @@ -2199,7 +2020,7 @@ extern "C" CLEO::HSTREAM WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * { - return ((CAudioStream*)stream)->GetInternal(); + return stream; // CAudioStream::streamInternal offset is 0 } CLEO::CRunningScript* WINAPI CLEO_CreateCustomScript(CLEO::CRunningScript* fromThread, const char *script_name, int label) diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index adc01de9..8e56a616 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -1,7 +1,6 @@ #pragma once #include "CCodeInjector.h" #include "CDebug.h" -#include "CSoundSystem.h" #include #include diff --git a/source/CDebug.cpp b/source/CDebug.cpp index bc2202c9..0ac92941 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -99,7 +99,7 @@ void CDebug::Error(const char* format, ...) SHQueryUserNotificationState(&pquns); bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); - auto mainWnd = GetInstance().MainWnd; + auto mainWnd = RsGlobal.ps->window; if(fullscreen) { diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 5240deca..d2f82ff7 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -983,11 +983,7 @@ namespace CLEO } }); - for (void* func : GetInstance().GetCallbacks(eCallbackId::ScriptsLoaded)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } + GetInstance().CallCallbacks(eCallbackId::ScriptsLoaded); TRACE("Scripts search done"); } diff --git a/source/CSoundSystem.cpp b/source/CSoundSystem.cpp deleted file mode 100644 index 1705951a..00000000 --- a/source/CSoundSystem.cpp +++ /dev/null @@ -1,415 +0,0 @@ -#include "stdafx.h" -#include "CSoundSystem.h" -#include "bass.h" -#include "CDebug.h" -#include "CleoBase.h" -#include "Singleton.h" -#include - -namespace CLEO -{ - HWND(__cdecl * CreateMainWindow)(HINSTANCE hinst); - LRESULT(__stdcall * imp_DefWindowProc)(HWND wnd, UINT msg, WPARAM wparam, LPARAM lparam); - - HWND OnCreateMainWindow(HINSTANCE hinst) - { - if (HIWORD(BASS_GetVersion()) != BASSVERSION) LOG_WARNING(0, "An incorrect version of bass.dll has been loaded"); - TRACE("Creating main window..."); - auto mainWnd = CreateMainWindow(hinst); - if (!GetInstance().SoundSystem.Init(mainWnd)) SHOW_ERROR("CSoundSystem::Init() failed. Error code: %d", BASS_ErrorGetCode()); - - GetInstance().MainWnd = mainWnd; - return mainWnd; - } - - CPlaceable *camera; - RwCamera ** pRwCamera; - bool * userPaused; - bool * codePaused; - - LRESULT __stdcall HOOK_DefWindowProc(HWND wnd, UINT msg, WPARAM wparam, LPARAM lparam) - { - CleoSingletonCheck(); // check once for CLEO.asi duplicates - - if (GetInstance().SoundSystem.Initialized()) - { - // pause streams if the window loses focus, or if SA found any other reason to pause - if (*codePaused) GetInstance().SoundSystem.PauseStreams(); - else - { - switch (msg) - { - case WM_ACTIVATE: - if (wparam == 0) - { - GetInstance().SoundSystem.PauseStreams(); - } - else if (wparam == 1) - { - GetInstance().SoundSystem.ResumeStreams(); - } - break; - case WM_KILLFOCUS: - GetInstance().SoundSystem.PauseStreams(); - break; - } - } - } - return imp_DefWindowProc(wnd, msg, wparam, lparam); - } - - void CSoundSystem::Inject(CCodeInjector& inj) - { - TRACE("Injecting SoundSystem..."); - CGameVersionManager& gvm = GetInstance().VersionManager; - CreateMainWindow = gvm.TranslateMemoryAddress(MA_CREATE_MAIN_WINDOW_FUNCTION); - inj.ReplaceFunction(OnCreateMainWindow, gvm.TranslateMemoryAddress(MA_CALL_CREATE_MAIN_WINDOW)); - camera = gvm.TranslateMemoryAddress(MA_CAMERA); - userPaused = gvm.TranslateMemoryAddress(MA_USER_PAUSE); - codePaused = gvm.TranslateMemoryAddress(MA_CODE_PAUSE); - pRwCamera = gvm.TranslateMemoryAddress(MA_RW_CAMERA_PP); - auto addr = gvm.TranslateMemoryAddress(MA_DEF_WINDOW_PROC_PTR); - static const auto pWindowProcHook = &HOOK_DefWindowProc; - DWORD ptr; - inj.MemoryRead(addr, ptr); - inj.MemoryRead(ptr, imp_DefWindowProc); - inj.MemoryWrite(addr, (DWORD)&pWindowProcHook); - } - - void EnumerateBassDevices(int& total, int& enabled, int& default_device) - { - BASS_DEVICEINFO info; - for (default_device = -1, enabled = 0, total = 0; BASS_GetDeviceInfo(total, &info); ++total) - { - if (info.flags & BASS_DEVICE_ENABLED) ++enabled; - if (info.flags & BASS_DEVICE_DEFAULT) default_device = total; - TRACE("Found sound device %d%s: %s", total, default_device == total ? - " (default)" : "", info.name); - } - } - - BASS_3DVECTOR pos(0, 0, 0), vel(0, 0, 0), front(0, -1.0, 0), top(0, 0, 1.0); - - bool CSoundSystem::Init(HWND hwnd) - { - int default_device, total_devices, enabled_devices; - BASS_DEVICEINFO info = { nullptr, nullptr, 0 }; - EnumerateBassDevices(total_devices, enabled_devices, default_device); - if (forceDevice != -1 && BASS_GetDeviceInfo(forceDevice, &info) && - info.flags & BASS_DEVICE_ENABLED) - default_device = forceDevice; - - TRACE("On system found %d devices, %d enabled devices, assuming device to use: %d (%s)", - total_devices, enabled_devices, default_device, BASS_GetDeviceInfo(default_device, &info) ? - info.name : "Unknown device"); - - if (BASS_Init(default_device, 44100, BASS_DEVICE_3D, hwnd, nullptr) && - BASS_Set3DFactors(1.0f, 0.3f, 1.0f) && - BASS_Set3DPosition(&pos, &vel, &front, &top)) - { - TRACE("SoundSystem initialized"); - - // Can we use floating-point (HQ) audio streams? - DWORD floatable; // floating-point channel support? 0 = no, else yes - if (floatable = BASS_StreamCreate(44100, 1, BASS_SAMPLE_FLOAT, NULL, NULL)) - { - TRACE("Floating-point audio supported!"); - BASS_StreamFree(floatable); - } - else TRACE("Floating-point audio not supported!"); - - // - if (BASS_GetInfo(&SoundDevice)) - { - if (SoundDevice.flags & DSCAPS_EMULDRIVER) - TRACE("Audio drivers not installed - using DirectSound emulation"); - if (!SoundDevice.eax) - TRACE("Audio hardware acceleration disabled (no EAX)"); - } - - initialized = true; - this->hwnd = hwnd; - BASS_Apply3D(); - return true; - } - LOG_WARNING(0, "Could not initialize BASS sound system"); - return false; - } - - CAudioStream *CSoundSystem::LoadStream(const char *filename, bool in3d) - { - CAudioStream *result = in3d ? new C3DAudioStream(filename) : new CAudioStream(filename); - if (result->OK) - { - streams.insert(result); - return result; - } - delete result; - return nullptr; - } - - void CSoundSystem::UnloadStream(CAudioStream *stream) - { - if (streams.erase(stream)) - delete stream; - else - TRACE("Unloading of stream that is not in list of loaded streams"); - } - - void CSoundSystem::UnloadAllStreams() - { - std::for_each(streams.begin(), streams.end(), [](CAudioStream *stream) - { - delete stream; - }); - streams.clear(); - } - - void CSoundSystem::ResumeStreams() - { - paused = false; - std::for_each(streams.begin(), streams.end(), [](CAudioStream *stream) { - if (stream->state == CAudioStream::playing) stream->Resume(); - }); - } - - void CSoundSystem::PauseStreams() - { - paused = true; - std::for_each(streams.begin(), streams.end(), [](CAudioStream *stream) { - if (stream->state == CAudioStream::playing) stream->Pause(false); - }); - } - - void CSoundSystem::Update() - { - //// steam has a relocated var, so get it manually for now - //CGameVersionManager& gvm = GetInstance().VersionManager; - //bool bMenuActive = gvm.GetGameVersion() != GV_STEAM ? MenuManager->IsActive() : *((bool*)0xC3315C); - - if (*userPaused || *codePaused) // covers menu pausing, no disc in drive pausing, etc. - { - if (!paused) PauseStreams(); - } - else - { - if (paused) ResumeStreams(); - - // not in menu - // process camera movements - - CMatrixLink * pMatrix = nullptr; - CVector * pVec = nullptr; - if (camera->m_matrix) - { - pMatrix = camera->m_matrix; - pVec = &pMatrix->pos; - } - else pVec = &camera->m_placement.m_vPosn; - - BASS_Set3DPosition( - &BASS_3DVECTOR(pVec->y, pVec->z, pVec->x), - nullptr, - pMatrix ? &BASS_3DVECTOR(pMatrix->at.y, pMatrix->at.z, pMatrix->at.x) : nullptr, - pMatrix ? &BASS_3DVECTOR(pMatrix->up.y, pMatrix->up.z, pMatrix->up.x) : nullptr - ); - - // process all streams - std::for_each(streams.begin(), streams.end(), [](CAudioStream *stream) { - stream->Process(); - }); - // apply above changes - BASS_Apply3D(); - } - } - - CAudioStream::CAudioStream() - : streamInternal(0), state(no), OK(false) - { - } - - CAudioStream::CAudioStream(const char *src) : state(no), OK(false) - { - unsigned flags = BASS_SAMPLE_SOFTWARE; - if (GetInstance().SoundSystem.bUseFPAudio) - flags |= BASS_SAMPLE_FLOAT; - if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && - !(streamInternal = BASS_StreamCreateURL(src, 0, flags, 0, nullptr))) - { - LOG_WARNING(0, "Loading audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); - } - else OK = true; - } - - CAudioStream::~CAudioStream() - { - if (streamInternal) BASS_StreamFree(streamInternal); - } - - C3DAudioStream::C3DAudioStream(const char *src) : CAudioStream(), link(nullptr) - { - unsigned flags = BASS_SAMPLE_3D | BASS_SAMPLE_MONO | BASS_SAMPLE_SOFTWARE; - if (GetInstance().SoundSystem.bUseFPAudio) - flags |= BASS_SAMPLE_FLOAT; - if (!(streamInternal = BASS_StreamCreateFile(FALSE, src, 0, 0, flags)) && - !(streamInternal = BASS_StreamCreateURL(src, 0, flags, nullptr, nullptr))) - { - LOG_WARNING(0, "Loading 3d-audiostream %s failed. Error code: %d", src, BASS_ErrorGetCode()); - } - else - { - BASS_ChannelSet3DAttributes(streamInternal, 0, -1.0, -1.0, -1, -1, -1.0); - OK = true; - } - } - - C3DAudioStream::~C3DAudioStream() - { - if (streamInternal) BASS_StreamFree(streamInternal); - } - - void CAudioStream::Play() - { - BASS_ChannelPlay(streamInternal, TRUE); - state = playing; - } - - void CAudioStream::Pause(bool change_state) - { - BASS_ChannelPause(streamInternal); - if (change_state) state = paused; - } - - void CAudioStream::Stop() - { - BASS_ChannelPause(streamInternal); - BASS_ChannelSetPosition(streamInternal, 0, BASS_POS_BYTE); - state = paused; - } - - void CAudioStream::Resume() - { - BASS_ChannelPlay(streamInternal, FALSE); - state = playing; - } - - DWORD CAudioStream::GetLength() - { - return (unsigned)BASS_ChannelBytes2Seconds(streamInternal, - BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE)); - } - - DWORD CAudioStream::GetState() - { - if (state == stopped) return -1; // dont do this in case we changed state by pausing - switch (BASS_ChannelIsActive(streamInternal)) - { - case BASS_ACTIVE_STOPPED: - default: - return -1; - case BASS_ACTIVE_PLAYING: - case BASS_ACTIVE_STALLED: - return 1; - case BASS_ACTIVE_PAUSED: - return 2; - }; - } - - float CAudioStream::GetVolume() - { - float result; - if (!BASS_ChannelGetAttribute(streamInternal, BASS_ATTRIB_VOL, &result)) - return -1.0f; - return result; - } - - void CAudioStream::SetVolume(float val) - { - BASS_ChannelSetAttribute(streamInternal, BASS_ATTRIB_VOL, val); - } - - void CAudioStream::Loop(bool enable) - { - BASS_ChannelFlags(streamInternal, enable ? BASS_SAMPLE_LOOP : 0, BASS_SAMPLE_LOOP); - } - - HSTREAM CAudioStream::GetInternal() - { - return streamInternal; - } - - void CAudioStream::Process() - { - // no actions required // liez! - - switch (BASS_ChannelIsActive(streamInternal)) - { - case BASS_ACTIVE_PAUSED: - state = paused; - break; - case BASS_ACTIVE_PLAYING: - case BASS_ACTIVE_STALLED: - state = playing; - break; - case BASS_ACTIVE_STOPPED: - state = stopped; - break; - } - } - - void CAudioStream::Set3dPosition(const CVector& pos) - { - TRACE("Unimplemented CAudioStream::Set3dPosition()"); - } - - void CAudioStream::Link(CPlaceable *placable) - { - TRACE("Unimplemented CAudioStream::Link()"); - } - - void C3DAudioStream::Set3dPosition(const CVector& pos) - { - position.x = pos.y; - position.y = pos.z; - position.z = pos.x; - link = nullptr; - BASS_ChannelSet3DPosition(streamInternal, &position, nullptr, nullptr); - } - - void C3DAudioStream::Link(CPlaceable *placable) - { - link = placable; - //Set3dPosition(placable->GetPos()); - } - - void C3DAudioStream::Process() - { - // update playing position of the linked object - switch (BASS_ChannelIsActive(streamInternal)) - { - case BASS_ACTIVE_PAUSED: - state = paused; - break; - case BASS_ACTIVE_PLAYING: - case BASS_ACTIVE_STALLED: - state = playing; - break; - case BASS_ACTIVE_STOPPED: - state = stopped; - break; - } - if (state == playing) - { - if (link) - { - CVector * pVec = link->m_matrix ? &link->m_matrix->pos : &link->m_placement.m_vPosn; - BASS_ChannelSet3DPosition(streamInternal, &BASS_3DVECTOR(pVec->y, pVec->z, pVec->x), nullptr, nullptr); - } - else - { - BASS_ChannelSet3DPosition(streamInternal, &position, nullptr, nullptr); - //BASS_ChannelGet3DPosition(streamInternal, &position, nullptr, nullptr); - } - } - } -} diff --git a/source/CSoundSystem.h b/source/CSoundSystem.h deleted file mode 100644 index 263019dc..00000000 --- a/source/CSoundSystem.h +++ /dev/null @@ -1,115 +0,0 @@ -#pragma once -#include "stdafx.h" -#include "CCodeInjector.h" -#include -#include "bass.h" - -namespace CLEO -{ - class CAudioStream; - class C3DAudioStream; - - class CSoundSystem : VInjectible - { - friend class CAudioStream; - friend class C3DAudioStream; - - std::set streams; - BASS_INFO SoundDevice; - bool initialized; - int forceDevice; - bool paused; - bool bUseFPAudio; - HWND hwnd; - - public: - virtual void Inject(CCodeInjector& inj); - bool Init(HWND hwnd); - inline bool Initialized() { return initialized; } - - CSoundSystem() : initialized(false), forceDevice(-1), paused(false), bUseFPAudio(false) - { - // TODO: give to user an ability to force a sound device to use (ini-file or cmd-line?) - - } - - ~CSoundSystem() - { - TRACE("Closing SoundSystem..."); - UnloadAllStreams(); - if (initialized) - { - TRACE("Freeing BASS library"); - BASS_Free(); - initialized = false; - } - TRACE("SoundSystem closed!"); - } - - CAudioStream * LoadStream(const char *filename, bool in3d = false); - void PauseStreams(); - void ResumeStreams(); - void UnloadStream(CAudioStream *stream); - void UnloadAllStreams(); - void Update(); - }; - - class CAudioStream - { - friend class CSoundSystem; - - CAudioStream(const CAudioStream&); - - protected: - HSTREAM streamInternal; - enum eStreamState - { - no, - playing, - paused, - stopped, - } state; - bool OK; - CAudioStream(); - - public: - CAudioStream(const char *src); - virtual ~CAudioStream(); - - // actions on streams - void Play(); - void Pause(bool change_state = true); - void Stop(); - void Resume(); - DWORD GetLength(); - DWORD GetState(); - float GetVolume(); - void SetVolume(float val); - void Loop(bool enable); - HSTREAM GetInternal(); - - // overloadable actions - virtual void Set3dPosition(const CVector& pos); - virtual void Link(CPlaceable *placable = nullptr); - virtual void Process(); - }; - - class C3DAudioStream : public CAudioStream - { - friend class CSoundSystem; - - C3DAudioStream(const C3DAudioStream&); - - protected: - CPlaceable * link; - BASS_3DVECTOR position; - public: - C3DAudioStream(const char *src); - virtual ~C3DAudioStream(); - - // overloaded actions - virtual void Set3dPosition(const CVector& pos); - virtual void Link(CPlaceable *placable = nullptr); - virtual void Process(); - }; -} diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 83e96063..c872d51d 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "CleoBase.h" +#include "Singleton.h" namespace CLEO { @@ -18,13 +19,45 @@ namespace CLEO void __declspec(naked) CCleoInstance::OnUpdateGameLogics() { - //GetInstance().UpdateGameLogics(); // ! - GetInstance().SoundSystem.Update(); + CleoInstance.CallCallbacks(eCallbackId::GameProcess); // execute registered callbacks + static DWORD dwFunc; - dwFunc = (DWORD)(GetInstance().UpdateGameLogics); + dwFunc = (DWORD)(CleoInstance.UpdateGameLogics); _asm jmp dwFunc } + HWND CCleoInstance::OnCreateMainWnd(HINSTANCE hinst) + { + CleoSingletonCheck(); // check once for CLEO.asi duplicates + + auto& base = GetInstance(); + auto window = base.CreateMainWnd_Orig(hinst); // call original + + // redirect window handling procedure + *((size_t*)&base.MainWndProc_Orig) = GetWindowLongPtr(window, GWLP_WNDPROC); // store original address + SetWindowLongPtr(window, GWLP_WNDPROC, (LONG)OnMainWndProc); + + return window; + } + + LRESULT __stdcall CCleoInstance::OnMainWndProc(HWND wnd, UINT msg, WPARAM wparam, LPARAM lparam) + { + auto& base = GetInstance(); + + switch (msg) + { + case WM_ACTIVATE: + base.CallCallbacks(eCallbackId::MainWindowFocus, wparam != 0); + break; + + case WM_KILLFOCUS: + base.CallCallbacks(eCallbackId::MainWindowFocus, false); + break; + } + + return base.MainWndProc_Orig(wnd, msg, wparam, lparam); + } + void CCleoInstance::OnScmInit1() { auto& base = GetInstance(); @@ -93,11 +126,12 @@ namespace CLEO GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); TextManager.Inject(CodeInjector); - SoundSystem.Inject(CodeInjector); OpcodeSystem.Inject(CodeInjector); ScriptEngine.Inject(CodeInjector); - CodeInjector.ReplaceFunction(&OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS), &UpdateGameLogics); + CodeInjector.ReplaceFunction(OnCreateMainWnd, VersionManager.TranslateMemoryAddress(MA_CALL_CREATE_MAIN_WINDOW), &CreateMainWnd_Orig); + + CodeInjector.ReplaceFunction(OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS), &UpdateGameLogics); CodeInjector.ReplaceFunction(OnScmInit1, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM1), &ScmInit1_Orig); CodeInjector.ReplaceFunction(OnScmInit2, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM2), &ScmInit2_Orig); @@ -132,11 +166,7 @@ namespace CLEO TRACE("Starting new game, save slot: %d", saveSlot); // execute registered callbacks - for (void* func : GetInstance().GetCallbacks(eCallbackId::GameBegin)) - { - typedef void WINAPI callback(DWORD); - ((callback*)func)((DWORD)saveSlot); - } + GetInstance().CallCallbacks(eCallbackId::GameBegin, saveSlot); TextManager.LoadFxts(); } @@ -147,17 +177,9 @@ namespace CLEO m_bGameInProgress = false; TRACE("Ending current game"); - - // execute registered callbacks - for (void* func : GetInstance().GetCallbacks(eCallbackId::GameEnd)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } - + GetInstance().CallCallbacks(eCallbackId::GameEnd); // execute registered callbacks ScriptEngine.GameEnd(); OpcodeSystem.FinalizeScriptObjects(); - SoundSystem.UnloadAllStreams(); TextManager.Clear(); saveSlot = -1; @@ -173,6 +195,24 @@ namespace CLEO return m_callbacks[id]; } + void CCleoInstance::CallCallbacks(eCallbackId id) + { + for (void* func : GetInstance().GetCallbacks(id)) + { + typedef void WINAPI callback(void); + ((callback*)func)(); + } + } + + void CCleoInstance::CallCallbacks(eCallbackId id, DWORD arg) + { + for (void* func : GetInstance().GetCallbacks(id)) + { + typedef void WINAPI callback(DWORD); + ((callback*)func)(arg); + } + } + void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func) { GetInstance().AddCallback(id, func); @@ -180,12 +220,7 @@ namespace CLEO void __cdecl CCleoInstance::OnDrawingFinished() { - // execute registered callbacks - for (void* func : GetInstance().GetCallbacks(eCallbackId::DrawingFinished)) - { - typedef void WINAPI callback(void); - ((callback*)func)(); - } + GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks } } diff --git a/source/CleoBase.h b/source/CleoBase.h index b449b577..7ffecc72 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -10,7 +10,6 @@ #include "CScriptEngine.h" #include "CCustomOpcodeSystem.h" #include "CTextManager.h" -#include "CSoundSystem.h" #include "FileEnumerator.h" #include "crc32.h" @@ -31,11 +30,9 @@ namespace CLEO CTextManager TextManager; CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; - CSoundSystem SoundSystem; CPluginSystem PluginSystem; //CLegacy Legacy; - HWND MainWnd = NULL; int saveSlot = -1; // -1 if not loaded from save CCleoInstance(); @@ -51,12 +48,22 @@ namespace CLEO void AddCallback(eCallbackId id, void* func); const std::set& GetCallbacks(eCallbackId id); + void CallCallbacks(eCallbackId id); + void CallCallbacks(eCallbackId id, DWORD arg); static void __cdecl OnDrawingFinished(); void(__cdecl * UpdateGameLogics)() = nullptr; static void __cdecl OnUpdateGameLogics(); + // call for InitInstance + HWND(__cdecl* CreateMainWnd_Orig)(HINSTANCE) = nullptr; + static HWND __cdecl OnCreateMainWnd(HINSTANCE hinst); + + // main window procedure hook + LRESULT(__stdcall* MainWndProc_Orig)(HWND, UINT, WPARAM, LPARAM) = nullptr; + static LRESULT __stdcall OnMainWndProc(HWND, UINT, WPARAM, LPARAM); + // calls to CTheScripts::Init void(__cdecl* ScmInit1_Orig)() = nullptr; void(__cdecl* ScmInit2_Orig)() = nullptr; diff --git a/tests/cleo_tests_runner.txt b/tests/.cleo_tests_runner.txt similarity index 97% rename from tests/cleo_tests_runner.txt rename to tests/.cleo_tests_runner.txt index 404ff27b..9259faaf 100644 --- a/tests/cleo_tests_runner.txt +++ b/tests/.cleo_tests_runner.txt @@ -40,7 +40,7 @@ terminate_this_custom_script does_file_exist 5@ // files only then stream_custom_script 5@ - get_last_created_custom_script 11@ + get_script_struct_just_created 11@ while is_script_running 11@ wait 0 diff --git a/tests/cleo_tests/0AEE.s b/tests/cleo_tests/0AEE.s new file mode 100644 index 0000000000000000000000000000000000000000..2af6eff5891d6815e431811c27407e94e260539c GIT binary patch literal 505 zcmZ3&%*SHj=<0fyfl-l<%fQjnQ9+|1zg&}nk%5Kb9Tyt|gM$N*XmDU=V7LYpa%KQB zfC7u0d>I(0{{R0Us7}% 1.8 + 0@ < 1.9 +then + trace "~g~~h~~h~0AEE (pow), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AEE (pow), #1 FAILED!~n~%f Expected~n~%f Occured" 1.858 0@ +end + + +// perform 3.1415^0.0 +wait 0 +0AEE: pow number 3.1415 power 0.0 result 0@ // tested opcode +if + 0@ == 1.0 +then + trace "~g~~h~~h~0AEE (pow), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0AEE (pow), #2 FAILED!~n~%f Expected~n~%f Occured" 1.0 0@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0AAC.s b/tests/cleo_tests/Audio/0AAC.s new file mode 100644 index 0000000000000000000000000000000000000000..da154071f475d12962aad1c6e1b4caeadb4b55d0 GIT binary patch literal 434 zcmZ3&%*SHj=;(Zyfl-l90>o0#$jMJkiBBv|$;^*0E-6Y)%++LIWME;q2E=Q)_=NOg zGV{t3b23vD@)C1X^>Pc0nHd;-85q6)|NqaR$QNE$R9BY)MYs&qQBXEeaC7wZadlCw z%d7Lt&r?XP$Sf|&%u82D%gjkt0J$MG#a_W9F)t-2)k;A%1!xHa69Xd?&=#OmUFy<_ zu_C}RIM~%iL8CmgBm-zeL27bIYKlU6Vo@H@CKPXR>BYDJ#Sq?E^$%>34lx!PDnKj( lIWPdI4;TzVLhT^N`c7$tz3 z85H@#>x!V7iPG=p=;`C?qF9$#=a!h6lbWJXlCO}HpO~VM2IT1}cqHbfho1>?XtBYb?UY%QFW=?8~LP@?tPJUvFLK={#r{Iy8my(lerJ$MubQ{>! Q>$rd{;+NKY+^Pb^Kz%#SZFDN0Sujn7HVOE1aLWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*RSUSvX~jczOV!_SU_&y`Hxh$8zQ+q5vUvJ_5hOHZm588yMcmhML}wENooqv VBdRG1{>jOuMX4!3*D*}y0stUlgUA2? literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0AAF.txt b/tests/cleo_tests/Audio/0AAF.txt new file mode 100644 index 00000000..f41e81fc --- /dev/null +++ b/tests/cleo_tests/Audio/0AAF.txt @@ -0,0 +1,46 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AAF" // get_audio_stream_length + +debug_on + +trace "0AAF (get_audio_stream_length)" + + +// load the file +wait 0 +if + load_audio_stream ".\Ding.mp3" store_to 0@ +then + trace "~g~~h~~h~0AAF (get_audio_stream_length), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AAF (get_audio_stream_length), #0 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +// get stream length +wait 0 +0AAF: get_audio_stream_length 0@ store_to 1@ // tested opcode +if + 1@ == 0 // Ding.mp3 duration is about 0.25 second, opcode returns integer with seconds +then + trace "~g~~h~~h~0AAF (get_audio_stream_length), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AAF (get_audio_stream_length), #1 FAILED!~n~0 Expected~n~%d Occured" 1@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0AB9.s b/tests/cleo_tests/Audio/0AB9.s new file mode 100644 index 0000000000000000000000000000000000000000..6dd24a656d88ed2e25420cd2950f094b821409f9 GIT binary patch literal 843 zcmZ3&%*SHj=wx}Afl-l97Q|A}NKY+^Pb^Kz%#SZFDN0Su1yYG6shSLo3@i-SfOriT zAD3Q?OJ-iWUT%RgGXsM!17rFB|Nns|7}llN)n$MXUZZprlnoRD9D{>hT^N`c7$tz3 z85H@#>x!V7iPG=p=;`C?qF9$#=a!h6lbWJXlCO}HpO~VM2IT1}cqHbfb)NOZX&lFO5Unt?74AjRc|3J900rYN{p6r?7Xq^1D< z0p$57Czlqbrm*}6IuGpVwID~cFrtO6qlU*T?^o=xbw` qnZWqU1!^YM*T%&7ni1k_Rvku6SN{jPT96V~Z~sFhSA*kfG8X`OGzX9X literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0AB9.txt b/tests/cleo_tests/Audio/0AB9.txt new file mode 100644 index 00000000..701c450b --- /dev/null +++ b/tests/cleo_tests/Audio/0AB9.txt @@ -0,0 +1,76 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AB9" // get_audio_stream_state + +debug_on + +trace "0AB9 (get_audio_stream_state)" + + +// load the file +wait 0 +if + load_audio_stream ".\Ding.mp3" store_to 0@ +then + trace "~g~~h~~h~0AB9 (get_audio_stream_state), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #0 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +// get state +wait 0 +0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +if + 1@ == 2 // paused by default +then + trace "~g~~h~~h~0AB9 (get_audio_stream_state), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #1 FAILED!~n~%d Expected~n~%d Occured" -1 1@ +end + + +// set new state +wait 0 +set_audio_stream_state 0@ state AudioStreamState.Play +trace "~g~~h~~h~0AAD (set_audio_stream_state), #2 PASSED" + + +// get updated state +wait 0 +0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +if + 1@ == 1 // play +then + trace "~g~~h~~h~0AB9 (get_audio_stream_state), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #3 FAILED!~n~%d Expected~n~%d Occured" 1 1@ +end + + +// check if state updated after playback end +wait 300 // Ding.mp3 is 0.25s long +0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +if + 1@ == -1 // AudioStreamState.Stop +then + trace "~g~~h~~h~0AB9 (get_audio_stream_state), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #3 FAILED!~n~%d Expected~n~%d Occured" -1 1@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0ABB.s b/tests/cleo_tests/Audio/0ABB.s new file mode 100644 index 0000000000000000000000000000000000000000..2fb78156abf693ffbcf645915206b771dc7ed1e2 GIT binary patch literal 426 zcmZ3&%*SHj=;U;mfl-l94#ZN>NKY+^Pb^Kz%#SZFDN0SujW5g3Da}pQWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*YD;6vX~jczHkOH*cccZ?19eT_K#HO8zMPB0;n74{6LbNZ>WH9zG|9+Yehk7 ba!G0m&@(`ue{ynZQECdC{8VjPb^Kz%#SZFDN0SujW5g3Da}pQWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*YD;6vX~jczHkOH*cccZ?19eT_K#HO8zMPB0;n74{6LbNZ>WH9zG|9+Yehk7 za!G0m&@(`ue{ynZQECdG6d{EAuyQ>0AJ12rvLx| literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0ABC.txt b/tests/cleo_tests/Audio/0ABC.txt new file mode 100644 index 00000000..c55de2ab --- /dev/null +++ b/tests/cleo_tests/Audio/0ABC.txt @@ -0,0 +1,64 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0ABC" // set_audio_stream_volume + +debug_on + +trace "0ABC (set_audio_stream_volume)" + + +// load the file +wait 0 +if + load_audio_stream ".\Ding.mp3" store_to 0@ +then + trace "~g~~h~~h~0ABC (set_audio_stream_volume), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0ABC (set_audio_stream_volume), #0 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +// get stream volume +wait 0 +get_audio_stream_volume 0@ store_to 1@ +if + 1@ == 1.0 // default volume +then + trace "~g~~h~~h~0ABC (set_audio_stream_volume), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0ABC (set_audio_stream_volume), #1 FAILED!~n~%f Expected~n~%f Occured" 1.0 1@ +end + + +// set stream volume +wait 0 +0ABC: set_audio_stream_volume 0@ volume 0.25 // tested opcode +trace "~g~~h~~h~0ABC (set_audio_stream_volume), #2 PASSED" + + +// get updated volume +wait 0 +get_audio_stream_volume 0@ store_to 1@ +if + 1@ == 0.25 +then + trace "~g~~h~~h~0ABC (set_audio_stream_volume), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0ABC (set_audio_stream_volume), #3 FAILED!~n~%f Expected~n~%f Occured" 0.25 1@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0AC0.s b/tests/cleo_tests/Audio/0AC0.s new file mode 100644 index 0000000000000000000000000000000000000000..704ce66d5d0f5a290449e4d6a341db3532856fb6 GIT binary patch literal 802 zcmZ3&%*SHj=xlJ9fl-l94#ZN>C{8VjPb^Kz%#SZFDN0SujnB!?FGx+%WME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*B{^lvRD{NbEqNAp&2O43xY{NSmfGH%{gP0Bvse5&!@I literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0AC0.txt b/tests/cleo_tests/Audio/0AC0.txt new file mode 100644 index 00000000..d72caf20 --- /dev/null +++ b/tests/cleo_tests/Audio/0AC0.txt @@ -0,0 +1,76 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC0" // set_audio_stream_looped + +debug_on + +trace "0AC0 (set_audio_stream_looped)" + + +// load the file +wait 0 +if + load_audio_stream ".\Ding.mp3" store_to 0@ +then + trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC0 (set_audio_stream_looped), #0 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +// enable looping +wait 0 +0AC0: set_audio_stream_looped 0@ flag true // tested opcode +trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #1 PASSED" + + +// start playback +wait 0 +set_audio_stream_state 0@ state AudioStreamState.Play +trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #2 PASSED" + + +// get updated state +wait 0 +get_audio_stream_state 0@ store_to 1@ +if + 1@ == 1 // play +then + trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC0 (set_audio_stream_looped), #3 FAILED!~n~%d Expected~n~%d Occured" 1 1@ +end + + +// check if still playing +wait 400 // Ding.mp3 is 0.25s long +get_audio_stream_state 0@ store_to 1@ +if + 1@ == 1 // play +then + trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC0 (set_audio_stream_looped), #4 FAILED!~n~%d Expected~n~%d Occured" -1 1@ +end + + +// stop playback +wait 0 +set_audio_stream_state 0@ state AudioStreamState.Stop +trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #5 PASSED" + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0AC1.s b/tests/cleo_tests/Audio/0AC1.s new file mode 100644 index 0000000000000000000000000000000000000000..22bc4b7fd8babbbdc54e1e844daf6889f88b08e2 GIT binary patch literal 449 zcmZ3&%*SHj=xlhHfl-l98pKl2$jMJki8oG(Pb^Kz%#SZFDN0Su)ns5~U}3lh#0R|U2nq%8rC!NIOB3L52^B^f|V3R06xQd1Pl6N~bImZ5l#OE1O+D2DLVJcwQTMA~Jj p0I>_?Oh=$rV0eTQA0CEC;o+7D41*Mfl6-JDfI~zNEhr{)0Ra0NkA?sM literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0AC1.txt b/tests/cleo_tests/Audio/0AC1.txt new file mode 100644 index 00000000..7ea96a2f --- /dev/null +++ b/tests/cleo_tests/Audio/0AC1.txt @@ -0,0 +1,44 @@ +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC1" // load_3d_audio_stream +debug_on + +trace "0AC1 (load_3d_audio_stream)" + + +// load not existing file +wait 0 +if + 0AC1: load_3d_audio_stream ".\invalid name.mp3" store_to 0@ // tested opcode +then + breakpoint "~r~~h~~h~~h~0AC1 (load_3d_audio_stream), #0 FAILED!~n~Non existing file loaded? Handle: %d" 0@ +else + trace "~g~~h~~h~0AC1 (load_3d_audio_stream), #0 PASSED (with expected warning)" +end + + +// load existing file +wait 0 +if + load_3d_audio_stream ".\Ding.mp3" store_to 0@ // tested opcode +then + trace "~g~~h~~h~0AC1 (load_3d_audio_stream), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC1 (load_3d_audio_stream), #1 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/0AC2.s b/tests/cleo_tests/Audio/0AC2.s new file mode 100644 index 0000000000000000000000000000000000000000..9d9f7a4547059055ba65d99987027e1f91d2febe GIT binary patch literal 1099 zcmdUt!Aiq07{|Y?orfZQ0)-S2oQ$!po&*oYZEi4OxE+NMnz$9ZX0#FX(wFey3wZSn zyiC1&^GQ5;uqNncF9WT3FeKrh%m4d+|1UR&ZPvZR<^v$du4sjK5-!Osq?g2Hgr2iN zkVM9uP6?I77b0d!1rV6U_yG zEtl3c$F(Y7n4^*7*!|$(ML8Pyi({Y$!(;4EXiR;n6caqbk&u{Q1c^+X1<9kSDAoY* zTwS0QfPug$fnn%j7aLD=fu%pdy`_r%-qX!{K3Vogzug&;6{}oDrU`etvWD|+wK5Tc e@@sLby?h7D;Jp79+~$hR$Vu<0JNnz?uB>S6ov1!$tFmi;RX>EN@yEbx(ZE7g$ki{BM8Hg8QVa9N-~0O@)EAyx%DAL z_wIZaabcW6qb@aYFPzz&bH2F%cYWI!`jdl4AdbDO74B6+(oM>)smCe1 zCTYB8hjCxpH5T%cpe5$o!3*Aq=KU?FfACz3-xkw#&VvDL)_u2^*2vu_O>*`tqSkR zF#5$X{$z-|F^uqsAyVfMCFpRN literal 0 HcmV?d00001 diff --git a/tests/cleo_tests/Audio/0AC4.txt b/tests/cleo_tests/Audio/0AC4.txt new file mode 100644 index 00000000..6c757c0b --- /dev/null +++ b/tests/cleo_tests/Audio/0AC4.txt @@ -0,0 +1,61 @@ +{$CLEO .cs} +nop + +{$CLEO .s} +{$USE debug} +{$USE memory} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "0AC4" // set_play_3d_audio_stream_at_char +debug_on + +trace "0AC4 (set_play_3d_audio_stream_at_char)" + + +// load existing file +wait 0 +if + load_3d_audio_stream ".\Ding.mp3" store_to 0@ +then + trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #0 FAILED!~n~Failed to load file. Handle: %d" 0@ +end + + +// test player's actor +wait 0 +if + does_char_exist $scplayer +then + trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #1 FAILED!~n~Player character does not exists!" 0@ +end + + +// attach to character +wait 0 +0AC4: set_play_3d_audio_stream_at_char 0@ character $scplayer // tested opcode +trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #2 PASSED" + + +// play the sound +wait 0 +print_big_formatted "AT PLAYER CHAR" {time} 300 {style} TextStyle.MiddleSmaller +set_audio_stream_state 0@ state AudioStreamState.Play +wait 250 +trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #3 PASSED" + + +terminate_this_custom_script diff --git a/tests/cleo_tests/Audio/Ding.mp3 b/tests/cleo_tests/Audio/Ding.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f2fdb63356df93c127fbc3a3311eeab579c80bc1 GIT binary patch literal 3311 zcmd^?`8U+xAIIOr45JxK_!L778I5HaQMNRbY$ID`iIim$BBY4)(J+I_l57)Y8DX-M z5-DWMp5=o!TZoD*MV1-fU%uz~p7Z_w3*W~%_uO;u{o%gO^R?X91=;tTf&b;4r;q#g zp5XRC0043Ukl00Ym$SS0>~e1x)-Er0>E7k@E`N3bciFy6+w6q7p}LA19?!+tHAz%U z4l1b+09reC>_zgl>>m8T%pluu+d;f$B4-8?DBN(>xq3AU08oG|m{?2Wc&hh@OhM{2 zlY)kGkFOmUoWkD`Swjm_$JYoyc$+o|9g>^Q<7hEXb|Zya4{kL4{(TgHqM`6;IrZfU zGtnuAfaKJwr{)K$o61RDT*!Tm$oaNmf2Kil%o66_Y#8OJi-opD2$GuR7Jk0c7JgvE zL{a_VNH(bqWbM@t&LzHP#mk;=hrA@s#eeQBb9BAK?{)!Lgq4IQ=^u(G*ooXv;>BttEAOMPasaq722S>X zU}M&Cmy(;3i%-3_a$yhdtUmDD^aT|Q{r>HGF-ZQbb`&}r4xvP$Hk}8t--N}Ke!?ln zG|L9KBDA4lM>vtNq!eyaZ7qdM{~)%YwZ{Q%Bz4Ys)T?p%tO#eo73C&FY&a6Wg* z+gyIEFqB+Cq*ibmT?+V!@&`7rgDbT&LsJ!wskcJvcjEM3-94}9;krv&pN4d&@tq+D zLGb7Kij=Uco<5&n4yzXhmSL*Cj}5yA7`gCX7Uq@6Ofaegx41%>dw{oL<+*_kOc@F( ze&VtbBqY8YGZ9}@|2JY*J{&puo;j z{o&Q%DVt<}!$2ErS4|+6$Q#8tmN+(d@zl!T4JZJZFr@~(zG*EyX>jjai#qH9Re>o2 zBXn5Gj&K4Lk(c6z7RS?s@mZb<&k3gcmS^a^EaGdDQDo|%I1>KG%`tWT$%}LYG?I^e z){##5Sj!!D_9!$R#Spyak<;;6ERPeX8A*oDKgDD@5f)}g1a$)P^SFwnXKW*EWK8bG zIE=G(&??B%G{;heYg(uVCdZ)qvFMV?VFg(UEwMpo$>+Xe60&8#>7h1b;Ff7GkqGcY zOYMSCMZG<3kXVWPevmbJn6Wmi@$j)5q!FgSLq+R#Qe zrXoBfsP|9=Fe{D$kIvYWLnzYI9HZQ&_FdVSSR;X8kN*_Gk&fw{ql!85=3LLaUEGk*^o?7OJ48Y~^H6$OG!D`0f6HeA=6$c516 zR({7UhabBkKTa&5xnzGFxLJaBb-`D8Qi{d*9Ll2Fx~e_I=xkdfl*2x9YT6>NU4cO2ezkr-R7 z52ZXFB%8?KHA)1eTz)usu6;#b4LX$Y2(5UcvR%Oimc62E-ctNw{Zp7-Ii^ZC#%qmW z8?&?^R=Qc8Qtd>Td@zI>mjMKkw6( z2vgle2-#f5cO>~8PNDN%f-@aLb1^&WQXG93mFdj%{(dlw`WyGFi-@t3f2H^&rT+_z zDwLl@8g(drU*Kjyxh^e~E`EL95<-?zeF3)CPT!KdwD0x;Aq3p~%Gv*q)tsJWz}C{E z*Fk2ZJu6#BGG=b95B=IQNsHGLbE>zN3CGGL*pCt){uxDE8YeTh(%vcL&vB`B8o~zg z6e0k?@iMiz!}BMK0c&VyIg#|8MXT!foHiyOe`5OmRQO@JTgH?bmY2C5?tT6m$aZcP z2ZZJ}NhOu3eJfc70aZnCsznp`MA{e9ty)~U^V0-az34J8XYx$l2Kp#`3L`HLO3Grx)i;HWu{3SV75`cd0Ss2Q)>pC8 zdn8#%Txb8Bo#vqTgBaf|uiD$|rs-0OtDnm(12ap1_;FH_)-;80-kNW#iQP%T!QtB_{Y`@d;xOAzu-3So)vGpq#J#O)e~ zNM_sp)Cf31lm2=8cf~{ttGL1#vUvuh60EB`qaO{@hz3^&!MnGYa~_Pa%UR}0->uJn@POd#~Af@PZ+ zgA1Rqs=b(YZfTJ1!IkVv%v5m+6pX0uubOMK&c2jWQsj{fKR5%pbL{HTo|nOt;!yn|j}+}p%7Yg{7DrbH9_d?9ElJPTaPx$N;>=~t$| z-zkIG_1ksV^YsXN;8`mzkd1Ff0ejGFa`6$71QV&*fE3h(FSx-j+N$A9q}x8Oe--JH zoA^AzPf%`LG$VNmj<<(`0s!#1P~RPSmF^`XF*4H=M&#pcJbbvtqEr$g&yL=a1FMw zP_ERJ)EwUXi1W+~J1+ndfPdS(`n(NLhiA=bYQF)XS2~*~5X*6!0w>LDI;*xy4E^o% z{|>D(^dcLBC7U*y4#hRt;y#W3K>wwLwD8I8gxaEPa$J8Oq93kw4y1CZXwu#t;# z;s5{tS(x}(lYu&|__#n+dpozjEa0>b>(#+<1_Qp6>{@aN^?>b z)K!bsnHd=vPH?fZFfy|;T;>82eB7$VF)<*lrB}?%zyMOf#=y+Rzyegq;=s(#a1F?O z22u(X1af^D7{$U2v<;0QCbE9feSKK(#NpfRaF=ML;2-KZWXw>iqH*z~P{f zmS38eq6qaEC(LbHsNO=e4ApBV{^IpoFU+CL93Yo6SvCL}EP7yyRZs5;)Ym`Zrm#UA r0d)a95<#H}3@J3HAo=?W&;+0-&qAnVpkKlMJ_)h_NU$?(;$i>*p8Sgw diff --git a/tests/test_file_read_write.txt b/tests/test_file_read_write.txt deleted file mode 100644 index 65c77cf0..00000000 --- a/tests/test_file_read_write.txt +++ /dev/null @@ -1,205 +0,0 @@ -{$CLEO .cs} -{$USE file} -{$USE debug} - -debug_on -wait 3000 - -var 5@ : Integer -var 6@ : Integer - -copy_file "cleo\.cleo.log" {to} "cleo\.cleo_test.log" -while true - if - // test 0A9A - 0@ = open_file "cleo\.cleo_test.log" {mode} "r+" // read and write - then - print_formatted_now "0A9A File opened" time 1000 - wait 1000 - - // test 0A9C - 1@ = get_file_size 0@ - print_formatted_now "0A9C File size: %d" time 2000 1@ - wait 2000 - - // test 0A9D - 5@ = 0xCCCCCCCC - 0A9D: readfile 0@ size 2 to 5@ - print_formatted_now "0A9D Read WORD %x" time 2000 5@ - wait 2000 - - // test 0A9E - 5@ = 0xAABBCCDD - 0A9E: write_file 0@ size 2 from 5@ - 0AD5: file 0@ seek -2 from_origin SeekOrigin.Current //IF and SET - 5@ = 0 - 0A9D: readfile 0@ size 2 to 5@ - if - 5@ == 0xCCDD - then - print_formatted_now "0A9E ok" time 1000 - wait 1000 - else - print_formatted_now "~r~0A9E failed~n~read: 0x%X, expected: 0xCCDD" time 5000 5@ - wait 5000 - end - - // test 0AD5 - 0A9D: readfile 0@ size 4 to 5@ - if - 0AD5: file 0@ seek -2 from_origin SeekOrigin.Current //IF and SET - then - 0A9D: readfile 0@ size 4 to 6@ - if - 5@ <> 6@ - then - print_formatted_now "0AD5 ok" time 1000 - wait 1000 - else - print_formatted_now "~r~0AD5 invalid result" time 5000 - wait 5000 - end - else - print_formatted_now "~r~0AD5 seek back failed" time 5000 - wait 5000 - end - - // test 0AD6 - if - not is_end_of_file_reached 0@ - then - print_formatted_now "0AD6: not EOF yet" time 1000 - wait 1000 - else - print_formatted_now "~r~0AD6: EOF reached" time 5000 - wait 5000 - end - - // test 0AD7 - 0AD5: file 0@ seek 30 from_origin SeekOrigin.Current - if - 0AD7: read_string_from_file 0@ to 1@v size 15 - then - 0ACE: print_help_formatted "0AD7 read string" - print_formatted_now "Read: %s" time 2000 1@v - wait 2000 - else - print_formatted_now "~r~0AD7 failed" time 5000 - wait 5000 - end - - // test 0AD8 - if - 0AD8: write_string_to_file 0@ {text} "test text" - then - 0AD5: file 0@ seek -9 from_origin SeekOrigin.Current - 0AD7: read_string_from_file 0@ to 1@v size 10 - - if - 1@v == "test text" - then - print_formatted_now "0AD8 ok" time 1000 - wait 1000 - else - print_formatted_now "~r~0AD8 invalid result~n~%s" time 5000 1@v - wait 5000 - end - else - print_formatted_now "~r~0AD8 failed to write" time 5000 - wait 5000 - end - - // test 0AD9 - 0AD9: write_formatted_string_to_file 0@ {format} "%x%X%s" {args} 0xA 0xB "CD" - 0AD5: file 0@ seek -4 from_origin SeekOrigin.Current - 0AD7: read_string_from_file 0@ to 1@v size 5 - if - 1@v == "aBCD" - then - print_formatted_now "0AD9 ok" time 1000 - wait 1000 - else - print_formatted_now "~r~0AD9 invalid result~n~%s" time 5000 1@v - wait 5000 - end - - // test 0ADA - 0AD8: write_string_to_file 0@ {text} "5:17 3.1415 END" - 0AD5: file 0@ seek -15 from_origin SeekOrigin.Current - if - 0ADA: scan_file 0@ {format} "%d:%d %f" {nValues} 5@ {values} 6@ 7@ 8@ - then - if and - 5@ == 3 - 6@ == 5 - 7@ == 17 - 8@ == 3.1415 - then - 0AD7: read_string_from_file 0@ to 1@v size 5 - if - 1@v == " END" - then - print_formatted_now "0ADA ok" time 1000 - wait 1000 - else - print_formatted_now "~r~0ADA post check fail~n~%s" time 5000 1@v - wait 5000 - end - else - print_formatted_now "~r~0ADA invalid result~n~%d %d %d %f" time 5000 5@ 6@ 7@ 8@ - wait 5000 - end - else - print_formatted_now "~r~0ADA failed. Read args: %d" time 5000 5@ - wait 5000 - end - - // test 2300 - 2300: get_file_position 0@ {store_to} 5@ - 0AD8: write_string_to_file 0@ {text} "abc" - 2300: get_file_position 0@ {store_to} 6@ - 6@ -= 5@ - if - 6@ == 3 - then - print_formatted_now "2300 ok" time 1000 - wait 1000 - else - print_formatted_now "~r~2300 failed. Difference: %d" time 1000 6@ - wait 1000 - end - - // test 2301 - 0AD8: write_string_to_file 0@ {text} "test text" - 0AD5: file 0@ seek -9 from_origin SeekOrigin.Current - 1@ = 0 - 2@ = 0 - 3@ = 0 - 4@ = 0 - 5@ = get_var_pointer 1@ - if - 2301: read_block_from_file 0@ {size} 9 {buffer} 5@ - then - if - 1@v == "test text" - then - print_formatted_now "2301 ok" time 1000 - wait 1000 - else - print_formatted_now "~r~2301 invalid result~n~%s" time 5000 1@v - wait 5000 - end - else - print_formatted_now "~r~2301 failed to read" time 5000 - wait 5000 - end - - // test 0A9B - 0A9B: close_file 0@ - else - print_formatted_now "Failed to open the file" time 5000 - end - - print_formatted_now "Finished testing file read write opcodes" time 5000 - wait 5000 -end diff --git a/third-party/bass/bass.lib b/third-party/bass/bass.lib deleted file mode 100644 index c69024ab53aa77394984519beea8c6bd4f084700..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25944 zcmdsAON?B_)jqbtI1T~FIAAcC!5H&O%sjfs;|F%n(=+zWxIGVh{G92TZrh!VXGT3e zC}cqpVije<0!1tmEBY5vRw!b@B8vz`S^bI-LQz%`3oDAEtWcB{LP@@=x>fhocVp;O0}h1CwIQqVgGgY_U60txvsuWyIlY5?dk1NoQ0i4 z^k<^(A)?$OQT}H{<*$jZzols6*F=J@|3lH_9wI@-V~WN;B@%RQLDBi$M1szKrD%SR zNYLcJ6rKAFa-fCIgg(F(H1#`0=YIr!Q2BjD69wRd=Kr9m_$%OouH8{I4qQRkA5k<8 z+k%dKp=f4}NYFIwNjiipXoeJB`74p2O9Kggge&Okl%gvSfG?=7=<4rL4!XQQq4#hF zUF%k~@Nd)yR9aEAc$r90>8_&1KjJrNsV|}5;0h{tD4GJUp!0uIH2Yg3K^NauH1iic zH|XM*ie^W_2VLk?ba5BTL34|WF8mVZpt(OO>VvGH!5xZ@!;YZg&lH_PI|@1heL*Mh zDC+wJdZ1JPRCKhHNKoNZMWgQ!NrJ6WP+>vQ*md{^XsD*>*n32RM&WmYPQ9&Y=4L1(}dboM8T&VVOq67?5!WKmJ?U83H*^Ydd9 z#dA|L^FzbagHvP0nR8R~i>38-nxCJX9Uu3(^kczR%$d>I$+PoA{WJX#uijpppP#4s zf&S^~`RPi1aCPNI^=6?@v;1;da@t7dEJ>@})-Bd+S!NTKZCMm%7*AMQ{z9d;UR_-& zbh?ki7*ck)R$HyfM=VzyX_uvK4^`f%E>*@>Zmc>F4>LATYK*N^>xFLnK}?yH7^ziU zY+WSz=mTZfsk~KgKb=z*^JJo2OUfE}&8bTja=lcmJGQ2ESt7Bz<_V}G$uE{R*4@gP zGAS{(E-hBdjv1?5O3tp8OZAG=2x*~|5nWlFbxfs&QU>jIVRd=qwrk2N*VYnAyZ>M? zE!Q@ChDJ(D_0`&Xp{v_+JJ_{(+O}P)240w#ViZ@`t99YpUA;bt#Y`<(#V(A+lsf%u zYs+u;3^|R+N~KhBdE;hvWqh?%c7}n?V6xJwZHH5`n!#j;S4xY^m7wL6#pKZ=+H+Y2 zm3hXK8q=lQYs(e+*&7Qte<$NxI9l|qzX@TJ%wo*zNa}mHSW0#Q`BrvPgtB?uhhIj&Xg?~W?bJhXZLPrkX>>Ht)mYo8PnNuel&LWRrYmO;u-ui>)NE`;bD0)urpj$Mwb8`< z!E{u#;!N8Kj0|K1OUfq`*T9?g%6j)S++wBdt>l?9+(M$;EiJhf4yF-E^Z+B33q`f!u8=Jo9TNZTz`7GTCOk) zcFe1r#uwr?@FD?$>Wcdcn`m2XuE#4j{6lvs*4V6qi4eEJE0<{_AjECpIeLan8?3vL zNcte26t@BpkMDbYNQzsL&tk#_=M{uMi6z4=NCbN^qVV7 zh3?>UaxT}Js@z_Eqaqc94P*1UCM=FuS2Ct*#^gH6TDF?a=bA7$G+oRd$C}A?lr_PL zR4rk1Jz$N@Wlxw1k83C-Fd{&TE44=E08uQ}5Pa2<>vFUL+(b)wX||TfWmvW9YOPv# z*FJ^Bc!|bTWxY~&{C#B3lu9Y#hSRGXE9G;G->)pW(mwZ;)Iz<7`{!6C3YSigj!sNW z6fP$Tkz+wuqKibIjv?cJj%fFJqOZ;(%RW!^FYrD?Stqi}AAo*0Mf4-^--nF?^#1^v zUqScIHRz5LJpy~5;rhi9qO}>wO%ojgiEWC%UO|TX646JXsjEZ};CCJLd))88O!OY8 z8+QH;oRt!GBanf4Cq1>9dpd(@DH zfA1L4PL#b3xxeH0cG!6v_U=IED_r-2|2ga(I8F2lY!1Cn^fBly$bEpa_Xkkt5j;Dt z3&20ok9NTC4{_a#c0O^2=qI>-i0eCO??trT-CntwMYn00o}mJrrZKueb5uk_UZ(*X zrb~2{uF(}5p&^>0QJSDLRHhrWNDI_YvviTp(KId5AXTVDa9%o3XK9=!X(!#H@6%1H z(zoa-dYT@m?X-h_K#$TlX%~Hi_R_<&hw_x8$EbszrRQir_0civrQ>vxPS6oLO0Ux? zdY%r_%k&Bzq8I4^y+AL~tMnx8qX+3BdW4>!Z_{r2E`5()qr=omUDQoI^c~tlTmK){ z>H*qDjSc(Fr;m9#5xEJ`WO4OhAf-wf5<}HYJmlDk1T?0uSRoy)&}$=qspaCC@qXytF!&l3deO521rEp8 zb-!H4F+NAfQ=Aje8en0JI*jC-Msytp+-!&8%z(py9SWXAvC3aLJqAV~0zzr1IZzyX zXcMqn4HbVQjJ|qvJI+gSa7S=t4(`mt5nWd|+R2W|L=C+ND%lg1Pa~%SzPXf^`b`7j zWCu)3B^$U1>g^I|&(w{@v~bdgI!^}ss>B-dD7A8g`0Z}BiXye`rcK$~p#FxRpRNt$ zdhOjrwc``qBC;mg@XnAb)q7g90aUIqkexN{;Ac_;?Zc9kNSW!bV5K78sa?k|)(KYj zIDy16oG9ri+c?UD$<#D4D($LG6ShObO09-78%D{3W**vQxH~nXy^=jHW*eoJA1Ny) z&Ceu71$}*4J(YyyvexF0m>0d#1I3;pfApX@J@23_>Jok=K|G8}&1xtQO!UsbQaks(8zAhLq)gK^c2Vj*G6 z9U5ivb-d{F{irb{(}>ghb7s)eW=;z;MkKTI#S9zG)|tpM_zX#W8RTU*B|weGWhS>x z#2GN|$DIKoxh{lLto~_+WSLDPQD)GMM;R*+m0DbriL**Y{j;-!g3IDmuIZm(lhYty zb(y&2Wi>RI;S*fy7^2;7LRgjuj|RFCt^EketcPGUR34c00H$MHTZ2Wq4#UK|tlUJs zye#7ooD~5xc{>hf3=<9WGI>%fjfp*$ju1RXxW;yr%#`d{*<-+3u+2@ltGI!dHxhyN zXS^ha4gt#;SVz*HfR3jaA>U(2dbuUe;`ZXGr69TPd9woaV~tQM0L(Y>G@q5Y$@I!Ipv_8vaoylU_>>VIEJ03n- zzEd1HKU`j3*58n9_U%ZnuUl=AbS&%~-?EL~JyBTNz6E;)*sj3emc!-B;>OK`V)sJ+ zA+=9GP44xjK)VJGSIh9zgVXbHkKq43j3S{Z z3}D|y-tEG_56U|Y0raKS+iQ5teZTlQ{pUX#OvsBr@myO;*S3Jo6H#(Ym&kevlx?~s zCuC5N%H-1$QT~khC;1b$@TEcWOI)^Mw*ueFH2vlmb`M?4M!lmucZ7MzR_Prd%cm00 z7WR&^WTpQw@95E`!aM#)|46RFJ4F5OZLc@do1zCYKjCP4M0FqWN7Uy)P<|6?O6r^|)hr}{^7y?1TfNP9j7)O+K9z1saSJCPiX<6FcG^%@X*u|wBb zWhZAXfnv1TD}OR zEvTi+^(2vxm(^(lw=0bMEO58FxF%E7I$kcLo?`rO^YB#msdcp8jM}{nv%h1fkcu==LaHL6#C2J3k5OF(@nhN|)+8qPdG zqqzCxn_G8$J_p<#0bCvT8NjAzqkJyhz6XH#jS!-Ktum1Mrr|p3rOavD4j}FfASUl4 z8m9R+jnwd)0X&l#Z5=Vviw_48RkGCpHs6cA;a)t5HvLu>V!OT?!F(iuY374I(}m{d zUii+V0n8-LY#LVIG>jMN$z3iU%Uo-m=A(nyc>K1Hm}bKbq`qmij&r0Uq=4m1=R0A0Ph}+l1L=-d;nGN zcIw(SKhwwFfew0z-VUk();A5eZzMu^!NW~9Wi?3C-WrR34*6*EGd}~?yst*a#)~eR z**k6nFVdDT89Zk{+Q8|XhI=mJqc0mYe~Vk=H1DIaeAX)rQE!{KjueZuUS&wg+gV%j zZ6HQ`<+~1Iy1m~zUM^!EeH`=J_fkA{WWd1an}&TPQmexTMW(LUDFltuyocm7YSn>S zy=LPDCu)t{X#H{->+N3Pb#gp)SVEg`e%{Jhh4ulb%fiv;FAP-kvsPpl>GrYInGOS| zZyIgE*huT)XwESajnll3m<=f>BPH50XCbQKsMGXT^H}+a|L1)yH~H0c-^FIgBOFg6 z<=UuOE+dYA8m-vJkUGRu>C-`N;6z6B z@f3%hIJ!SrCkz%p#nc8?#4ldAvGnoi=;Sg}=X z$iYH8ngezQu<4mTpD}-k_tN1k#J1O$n7@qpsBQa8ES@_Z!c&I}O|$BohQ~u>mt!;o zFFCKMp_(3pF+Ux1Fq0#YEn~&}^o)arcN=r6(f~H^rx|m!*yB9wBbt+%Z6HQw>v0!R zpZ+v(`lexzi9~o42F*QCs!^I>i(=j~>EN}!I%K@vh~46I4i;v$^jkAiJ$=(~Yex1P zikXPwD6K|nzC~km+Ib%{IZS(h@TPpcw&(0v&THDi^Fp%b$9rTv%s6ObhEYe8?@w#a z`grZPW+Xbh;NqzdMNH-NO~a!h(v}xfEaz0VMrmOrM6xGyHr~C-o@B%@;!XS#$1{h= z+o)S)Rk-Y9sn19aoW5za=VEWHS2)@|SYcyW*Ep6rH?M2cyobczj<2U!$yxk1a56km zWGCktjy&IQVDwF+y%d}I7Z^(VS%CQO#A=l0TQ473-AX|`{TV_Vn30|B#Y{~7X+s;h zk=S7=h?{&aVIcKQqrDl6jLHsT+mTVERVxk_A|ug_X2xtQR>>F_9T*ok99;IHh;B;b uyN$MKKJq?v)5qiAp0tb^%c Date: Mon, 26 Feb 2024 03:06:41 +0100 Subject: [PATCH 090/216] Fixed problems with ModLoader support. (#65) * Fixed problems with ModLoader support. * fixup! Fixed problems with ModLoader support. * Fixed directories creation in case if CLEO.asi is not placed in game's root. * Update source/CleoBase.cpp Co-authored-by: Seemann * Update source/CPluginSystem.h Co-authored-by: Seemann * fixup! Update source/CleoBase.cpp * Name buffers enlarged. * Review fixes. --------- Co-authored-by: Seemann --- .github/workflows/main.yml | 1 - README.md | 13 +- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 10 +- .../DebugUtils/DebugUtils.vcxproj.filters | 2 +- .../{DebugUtils.ini => SA.DebugUtils.ini} | 0 .../FileSystemOperations.vcxproj | 16 +- cleo_plugins/IniFiles/IniFiles.cpp | 162 ++++++------------ cleo_plugins/IniFiles/IniFiles.vcxproj | 8 +- .../IntOperations/IntOperations.vcxproj | 8 +- .../MemoryOperations/MemoryOperations.vcxproj | 16 +- source/CCustomOpcodeSystem.cpp | 4 +- source/CPluginSystem.h | 52 ++++-- source/CleoBase.cpp | 9 +- 13 files changed, 138 insertions(+), 163 deletions(-) rename cleo_plugins/DebugUtils/{DebugUtils.ini => SA.DebugUtils.ini} (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c220f58..262e178a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,6 @@ jobs: @REM copy files copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins - copy cleo_plugins\.output\*.cleo5 .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll xcopy /E /I tests\ .output\cleo diff --git a/README.md b/README.md index 52f5bb5f..cae11b92 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,18 @@ CLEO requires an 'ASI Loader' installed to run which is provided with the releas No additional files are replaced, however the following files and folders are added: - cleo\ (CLEO script directory) -- cleo\cleo_plugins\DebugUtils.cleo (script debug utilities plugin) -- cleo\cleo_plugins\FileSystemOperations.cleo (file system plugin) -- cleo\cleo_plugins\IniFiles.cleo (INI config plugin) -- cleo\cleo_plugins\IntOperations.cleo (INT operations plugin) +- cleo\cleo_plugins\SA.Audio.cleo (audio playback utilities powered by BASS.dll library) +- cleo\cleo_plugins\SA.DebugUtils.cleo (script debugging utilities plugin) +- cleo\cleo_plugins\SA.FileSystemOperations.cleo (disk drive files related operations plugin) +- cleo\cleo_plugins\SA.IniFiles.cleo (.ini config files handling plugin) +- cleo\cleo_plugins\SA.IntOperations.cleo (additional math operations plugin) +- cleo\cleo_plugins\SA.MemoryOperations (memory and .dll libraries utilities plugin) - cleo\cleo_saves\ (CLEO save directory) - cleo\cleo_text\ (CLEO text directory) - cleo.asi (core library) - bass.dll (audio engine library) -- vorbisHooked.dll (Silent's ASI Loader) +- vorbisFile.dll (Silent's ASI Loader) +- vorbisHooked.dll (original vorbisFile.dll file) All plugins are optional, however they may be required by various CLEO scripts. diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 838cf9e1..536eb3b5 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -44,14 +44,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - DebugUtils - .cleo5 + SA.DebugUtils + .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - DebugUtils - .cleo5 + SA.DebugUtils + .cleo $(GTA_SA_DIR)\gta_sa.exe @@ -132,7 +132,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - + diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters index 533d3aa0..b06f5016 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters @@ -34,6 +34,6 @@ - + \ No newline at end of file diff --git a/cleo_plugins/DebugUtils/DebugUtils.ini b/cleo_plugins/DebugUtils/SA.DebugUtils.ini similarity index 100% rename from cleo_plugins/DebugUtils/DebugUtils.ini rename to cleo_plugins/DebugUtils/SA.DebugUtils.ini diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 5129493b..742cef3a 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -44,14 +44,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - FileSystemOperations - .cleo5 + SA.FileSystemOperations + .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - FileSystemOperations - .cleo5 + SA.FileSystemOperations + .cleo $(GTA_SA_DIR)\gta_sa.exe @@ -83,7 +83,9 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) @@ -106,7 +108,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index 2daff3dd..0064d1e6 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -1,5 +1,6 @@ #include #include "CLEO.h" +#include "CLEO_Utils.h" #include using namespace CLEO; @@ -28,27 +29,24 @@ class IniFiles } } + // resused globals to cut down allocations + static char section[128]; + static char key[128]; + static OpcodeResult WINAPI Script_InifileGetInt(CScriptThread* thread) /**************************************************************** Opcode Format 0AF0=4,%4d% = get_int_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - int result; - - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); - result = GetPrivateProfileInt(sectionName, key, 0x80000000, path); - CLEO_SetIntOpcodeParam(thread, result); - CLEO_SetThreadCondResult(thread, result != 0x80000000); + auto result = GetPrivateProfileInt(section, key, 0x80000000, path); + OPCODE_WRITE_PARAM_INT(result); + OPCODE_CONDITION_RESULT(result != 0x80000000); return OR_CONTINUE; } @@ -58,23 +56,16 @@ class IniFiles 0AF1=4,write_int %1d% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - DWORD value; - char strValue[100]; - BOOL result; + auto value = OPCODE_READ_PARAM_INT(); + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); - value = CLEO_GetIntOpcodeParam(thread); - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - - result = WritePrivateProfileString(sectionName, key, _itoa(value, strValue, 10), path); - CLEO_SetThreadCondResult(thread, result); + char strValue[32]; + _itoa(value, strValue, 10); + auto result = WritePrivateProfileString(section, key, strValue, path); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } @@ -84,30 +75,23 @@ class IniFiles 0AF2=4,%4d% = get_float_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - float value = 0.0f; - char strValue[100]; - BOOL result; - - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - - result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), path); + auto value = 0.0f; + char strValue[32]; + auto result = GetPrivateProfileString(section, key, NULL, strValue, sizeof(strValue), path); if (result) { value = (float)atof(strValue); - CLEO_SetFloatOpcodeParam(thread, value); + OPCODE_WRITE_PARAM_FLOAT(value); } else - CLEO_SkipOpcodeParams(thread, 1); - - CLEO_SetThreadCondResult(thread, result); - + { + OPCODE_SKIP_PARAMS(1); + } + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } @@ -117,25 +101,16 @@ class IniFiles 0AF3=4,write_float %1d% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - float value; - char strValue[100]; - BOOL result; - - value = CLEO_GetFloatOpcodeParam(thread); - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); - - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path + auto value = OPCODE_READ_PARAM_FLOAT(); + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + char strValue[32]; sprintf(strValue, "%g", value); + auto result = WritePrivateProfileString(section, key, strValue, path); - result = WritePrivateProfileString(sectionName, key, strValue, path); - CLEO_SetThreadCondResult(thread, result); - + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } @@ -145,40 +120,21 @@ class IniFiles 0AF4=4,%4d% = read_string_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - char strValue[100]; - char *strptr; - BOOL result; - - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - - result = GetPrivateProfileString(sectionName, key, NULL, strValue, sizeof(strValue), path); + char strValue[MAX_STR_LEN]; + auto result = GetPrivateProfileString(section, key, NULL, strValue, sizeof(strValue), path); if (result) { - switch (CLEO_GetOperandType(thread)) - { - case DT_VAR_STRING: - case DT_LVAR_STRING: - case DT_VAR_TEXTLABEL: - case DT_LVAR_TEXTLABEL: - CLEO_WriteStringOpcodeParam(thread, strValue); - break; - default: - strptr = (char *)CLEO_GetIntOpcodeParam(thread); - strcpy(strptr, strValue); - } + OPCODE_WRITE_PARAM_STRING(strValue); } else - CLEO_SkipOpcodeParams(thread, 1); - - CLEO_SetThreadCondResult(thread, result); - + { + OPCODE_SKIP_PARAMS(1); + } + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } @@ -188,23 +144,17 @@ class IniFiles 0AF5=4,write_string %1s% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char path[MAX_PATH]; - char sectionName[100]; - char key[100]; - char strValue[100]; - BOOL result; - - CLEO_ReadStringPointerOpcodeParam(thread, strValue, sizeof(strValue)); - CLEO_ReadStringPointerOpcodeParam(thread, path, sizeof(path)); - CLEO_ReadStringPointerOpcodeParam(thread, sectionName, sizeof(sectionName)); - CLEO_ReadStringPointerOpcodeParam(thread, key, sizeof(key)); + char strValue[MAX_STR_LEN]; OPCODE_READ_PARAM_STRING_BUFF(strValue, sizeof(strValue)); + auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); + OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); - CLEO_ResolvePath(thread, path, sizeof(path)); // convert to absolute path - - result = WritePrivateProfileString(sectionName, key, strValue, path); - - CLEO_SetThreadCondResult(thread, result); + auto result = WritePrivateProfileString(section, key, strValue, path); + OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; } } iniFiles; + +char IniFiles::section[128]; +char IniFiles::key[128]; diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index 32eb6071..295783ac 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -44,14 +44,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - IniFiles - .cleo5 + SA.IniFiles + .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - IniFiles - .cleo5 + SA.IniFiles + .cleo $(GTA_SA_DIR)\gta_sa.exe diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj b/cleo_plugins/IntOperations/IntOperations.vcxproj index 3d0b82a1..83dd2abd 100644 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/IntOperations/IntOperations.vcxproj @@ -44,14 +44,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - IntOperations - .cleo5 + SA.IntOperations + .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - IntOperations - .cleo5 + SA.IntOperations + .cleo $(GTA_SA_DIR)\gta_sa.exe diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj index 3a5b08dc..bb8086a3 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj @@ -45,14 +45,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - MemoryOperations - .cleo5 + SA.MemoryOperations + .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - MemoryOperations - .cleo5 + SA.MemoryOperations + .cleo $(GTA_SA_DIR)\gta_sa.exe @@ -84,7 +84,9 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) @@ -107,7 +109,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" -xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 04b3fb53..10430374 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -209,7 +209,7 @@ namespace CLEO CCustomOpcodeSystem::CCustomOpcodeSystem() { - // register CLEO opcodes + TRACE("Initializing CLEO core opcodes..."); CLEO_RegisterOpcode(0x0A92, opcode_0A92); CLEO_RegisterOpcode(0x0A93, opcode_0A93); CLEO_RegisterOpcode(0x0A94, opcode_0A94); @@ -2018,7 +2018,7 @@ extern "C" return texture; } - CLEO::HSTREAM WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * + DWORD WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * { return stream; // CAudioStream::streamInternal offset is 0 } diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index e807953f..58341806 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -17,39 +17,55 @@ namespace CLEO CPluginSystem() { std::set loaded; - auto LoadPluginsDir = [&](std::string path, std::string extension) - { + auto LoadPluginsDir = [&](std::string path, std::string prefix, std::string extension) + { + std::set> filesWithPrefix; + std::set> filesWithoutPrefix; + FilesWalk(path.c_str(), extension.c_str(), [&](const char* fullPath, const char* filename) { std::string name = filename; name.resize(name.length() - extension.length()); // cut off file type std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); - if (loaded.find(name) == loaded.end()) + if (_strnicmp(name.c_str(), prefix.c_str(), prefix.length()) == 0) { - TRACE("Loading plugin '%s'", fullPath); - HMODULE hlib = LoadLibrary(fullPath); - if (!hlib) - { - LOG_WARNING(0, "Error loading plugin '%s'", fullPath); - } - else - { - loaded.insert(name); - plugins.push_back(hlib); - } + filesWithPrefix.insert({ fullPath, name.c_str() + prefix.length() }); } else { - LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name.c_str(), fullPath); + filesWithoutPrefix.insert({ fullPath, name }); } }); + + auto loadLib = [&](const char* fullPath, const char* name) + { + if (loaded.find(name) != loaded.end()) + { + LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", fullPath, name); + return; + } + + TRACE("Loading plugin '%s'", fullPath); + HMODULE hlib = LoadLibrary(fullPath); + if (!hlib) + { + LOG_WARNING(0, "Error loading plugin '%s'", fullPath); + return; + } + + loaded.insert(name); + plugins.push_back(hlib); + }; + + // load with prefix first + for (const auto& entry : filesWithPrefix) loadLib(entry.first.c_str(), entry.second.c_str()); + for (const auto& entry : filesWithoutPrefix) loadLib(entry.first.c_str(), entry.second.c_str()); }; TRACE("Loading plugins..."); - LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), ".cleo5"); // CLEO5 plugins - LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), ".cleo"); // legacy plugins - LoadPluginsDir(Filepath_Cleo.c_str(), ".cleo"); // legacy plugins location + LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "SA.", ".cleo"); // prioritize with prefix + LoadPluginsDir(Filepath_Cleo.c_str(), "SA.", ".cleo"); // legacy plugins location } ~CPluginSystem() diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index c872d51d..9a010cf0 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -116,11 +116,10 @@ namespace CLEO if (m_bStarted) return; // already started m_bStarted = true; - FS::create_directory(Filepath_Cleo); - FS::create_directory(FS::path(Filepath_Cleo).append("cleo_modules")); - FS::create_directory(FS::path(Filepath_Cleo).append("cleo_plugins")); - FS::create_directory(FS::path(Filepath_Cleo).append("cleo_saves")); - FS::create_directory(FS::path(Filepath_Cleo).append("cleo_text")); + FS::create_directory(FS::path(Filepath_Root).append("cleo")); + FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_modules")); + FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_plugins")); + FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_saves")); CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); From a7d434142add58fec2100ea36e9cbfc60ac5c131 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 26 Feb 2024 03:09:02 +0100 Subject: [PATCH 091/216] Remove legacy leftovers (#66) * Removed legacy leftover class. * fixup! Removed legacy leftover class. --- CLEO5.vcxproj | 2 -- CLEO5.vcxproj.filters | 6 ------ source/CCustomOpcodeSystem.cpp | 1 - source/CLegacy.cpp | 11 ----------- source/CLegacy.h | 16 ---------------- source/CleoBase.h | 1 - 6 files changed, 37 deletions(-) delete mode 100644 source/CLegacy.cpp delete mode 100644 source/CLegacy.h diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 0c7a0970..9a3fbe4b 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -44,7 +44,6 @@ - @@ -70,7 +69,6 @@ - diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 8b8127ad..841af70d 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -81,9 +81,6 @@ source\extensions - - source\extensions - source\extensions @@ -137,9 +134,6 @@ source\extensions - - source\extensions - source\extensions diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 10430374..3954ea45 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1,6 +1,5 @@ #include "stdafx.h" #include "CleoBase.h" -#include "CLegacy.h" #include "CGameVersionManager.h" #include "CCustomOpcodeSystem.h" #include "ScmFunction.h" diff --git a/source/CLegacy.cpp b/source/CLegacy.cpp deleted file mode 100644 index 8731c483..00000000 --- a/source/CLegacy.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include "stdafx.h" -#include "CLegacy.h" - -namespace CLEO -{ - // CLegacy Legacy; - - CLegacy::CLegacy() - { - } -}; diff --git a/source/CLegacy.h b/source/CLegacy.h deleted file mode 100644 index 31db9ac8..00000000 --- a/source/CLegacy.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "CCodeInjector.h" -#include "CGameVersionManager.h" - -namespace CLEO -{ - class CLegacy - { - DWORD FUNC_fopen; - DWORD FUNC_fclose; - - CLegacy(); - }; - - extern CLegacy Legacy; -}; diff --git a/source/CleoBase.h b/source/CleoBase.h index 7ffecc72..67dc418a 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -31,7 +31,6 @@ namespace CLEO CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; CPluginSystem PluginSystem; - //CLegacy Legacy; int saveSlot = -1; // -1 if not loaded from save From a1044ef418e8a6bf4ad9904eefefc397b015c125 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 26 Feb 2024 03:20:43 +0100 Subject: [PATCH 092/216] fix missing bass.h (#67) --- cleo_plugins/Audio/Audio.vcxproj | 2 +- cleo_plugins/Audio/CAudioStream.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index 4ae049fa..a3d5429b 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -70,7 +70,7 @@ MultiThreaded _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;$(SolutionDir)..\third-party\bass;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;$(ProjectDir)\bass\;%(AdditionalIncludeDirectories) stdcpp17 diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index f6c7d5af..74927334 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -60,7 +60,7 @@ void CAudioStream::SetProgress(float value) { value = std::clamp(value, 0.0f, 1.0f); auto total = BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE); - auto bytePos = total * value; + auto bytePos = QWORD(value * total); BASS_ChannelSetPosition(streamInternal, bytePos, BASS_POS_BYTE); } From 228d4e5066e558f53fa747c0b02b6bdebe0b7493 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 26 Feb 2024 03:31:05 +0100 Subject: [PATCH 093/216] fix missing bass.lib (#68) --- cleo_plugins/Audio/Audio.vcxproj | 6 ++++-- cleo_plugins/Audio/CSoundSystem.h | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index a3d5429b..ca1788db 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -47,12 +47,14 @@ $(ProjectDir).obj\$(Configuration)\ Audio .cleo5 + $(ProjectDir)bass;$(LibraryPath) $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ Audio .cleo5 + $(ProjectDir)bass;$(LibraryPath) $(GTA_SA_DIR)\gta_sa.exe @@ -79,7 +81,7 @@ true UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)bass\;%(AdditionalLibraryDirectories) - cleo.lib;%(AdditionalDependencies) + cleo.lib;bass.lib;%(AdditionalDependencies) Windows @@ -102,7 +104,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" true Default $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)bass\;%(AdditionalLibraryDirectories) - cleo.lib;%(AdditionalDependencies) + cleo.lib;bass.lib;%(AdditionalDependencies) Windows diff --git a/cleo_plugins/Audio/CSoundSystem.h b/cleo_plugins/Audio/CSoundSystem.h index abcc7cb1..86e57706 100644 --- a/cleo_plugins/Audio/CSoundSystem.h +++ b/cleo_plugins/Audio/CSoundSystem.h @@ -2,8 +2,6 @@ #include "bass.h" #include -#pragma comment(lib, "bass.lib") - namespace CLEO { class CAudioStream; From 7898104c9292625a7b9a259242fe85999929e7ab Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 26 Feb 2024 03:45:27 +0100 Subject: [PATCH 094/216] Add missing bass.lib file to repository (#69) --- cleo_plugins/Audio/bass/bass.lib | Bin 0 -> 25944 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cleo_plugins/Audio/bass/bass.lib diff --git a/cleo_plugins/Audio/bass/bass.lib b/cleo_plugins/Audio/bass/bass.lib new file mode 100644 index 0000000000000000000000000000000000000000..c69024ab53aa77394984519beea8c6bd4f084700 GIT binary patch literal 25944 zcmdsAON?B_)jqbtI1T~FIAAcC!5H&O%sjfs;|F%n(=+zWxIGVh{G92TZrh!VXGT3e zC}cqpVije<0!1tmEBY5vRw!b@B8vz`S^bI-LQz%`3oDAEtWcB{LP@@=x>fhocVp;O0}h1CwIQqVgGgY_U60txvsuWyIlY5?dk1NoQ0i4 z^k<^(A)?$OQT}H{<*$jZzols6*F=J@|3lH_9wI@-V~WN;B@%RQLDBi$M1szKrD%SR zNYLcJ6rKAFa-fCIgg(F(H1#`0=YIr!Q2BjD69wRd=Kr9m_$%OouH8{I4qQRkA5k<8 z+k%dKp=f4}NYFIwNjiipXoeJB`74p2O9Kggge&Okl%gvSfG?=7=<4rL4!XQQq4#hF zUF%k~@Nd)yR9aEAc$r90>8_&1KjJrNsV|}5;0h{tD4GJUp!0uIH2Yg3K^NauH1iic zH|XM*ie^W_2VLk?ba5BTL34|WF8mVZpt(OO>VvGH!5xZ@!;YZg&lH_PI|@1heL*Mh zDC+wJdZ1JPRCKhHNKoNZMWgQ!NrJ6WP+>vQ*md{^XsD*>*n32RM&WmYPQ9&Y=4L1(}dboM8T&VVOq67?5!WKmJ?U83H*^Ydd9 z#dA|L^FzbagHvP0nR8R~i>38-nxCJX9Uu3(^kczR%$d>I$+PoA{WJX#uijpppP#4s zf&S^~`RPi1aCPNI^=6?@v;1;da@t7dEJ>@})-Bd+S!NTKZCMm%7*AMQ{z9d;UR_-& zbh?ki7*ck)R$HyfM=VzyX_uvK4^`f%E>*@>Zmc>F4>LATYK*N^>xFLnK}?yH7^ziU zY+WSz=mTZfsk~KgKb=z*^JJo2OUfE}&8bTja=lcmJGQ2ESt7Bz<_V}G$uE{R*4@gP zGAS{(E-hBdjv1?5O3tp8OZAG=2x*~|5nWlFbxfs&QU>jIVRd=qwrk2N*VYnAyZ>M? zE!Q@ChDJ(D_0`&Xp{v_+JJ_{(+O}P)240w#ViZ@`t99YpUA;bt#Y`<(#V(A+lsf%u zYs+u;3^|R+N~KhBdE;hvWqh?%c7}n?V6xJwZHH5`n!#j;S4xY^m7wL6#pKZ=+H+Y2 zm3hXK8q=lQYs(e+*&7Qte<$NxI9l|qzX@TJ%wo*zNa}mHSW0#Q`BrvPgtB?uhhIj&Xg?~W?bJhXZLPrkX>>Ht)mYo8PnNuel&LWRrYmO;u-ui>)NE`;bD0)urpj$Mwb8`< z!E{u#;!N8Kj0|K1OUfq`*T9?g%6j)S++wBdt>l?9+(M$;EiJhf4yF-E^Z+B33q`f!u8=Jo9TNZTz`7GTCOk) zcFe1r#uwr?@FD?$>Wcdcn`m2XuE#4j{6lvs*4V6qi4eEJE0<{_AjECpIeLan8?3vL zNcte26t@BpkMDbYNQzsL&tk#_=M{uMi6z4=NCbN^qVV7 zh3?>UaxT}Js@z_Eqaqc94P*1UCM=FuS2Ct*#^gH6TDF?a=bA7$G+oRd$C}A?lr_PL zR4rk1Jz$N@Wlxw1k83C-Fd{&TE44=E08uQ}5Pa2<>vFUL+(b)wX||TfWmvW9YOPv# z*FJ^Bc!|bTWxY~&{C#B3lu9Y#hSRGXE9G;G->)pW(mwZ;)Iz<7`{!6C3YSigj!sNW z6fP$Tkz+wuqKibIjv?cJj%fFJqOZ;(%RW!^FYrD?Stqi}AAo*0Mf4-^--nF?^#1^v zUqScIHRz5LJpy~5;rhi9qO}>wO%ojgiEWC%UO|TX646JXsjEZ};CCJLd))88O!OY8 z8+QH;oRt!GBanf4Cq1>9dpd(@DH zfA1L4PL#b3xxeH0cG!6v_U=IED_r-2|2ga(I8F2lY!1Cn^fBly$bEpa_Xkkt5j;Dt z3&20ok9NTC4{_a#c0O^2=qI>-i0eCO??trT-CntwMYn00o}mJrrZKueb5uk_UZ(*X zrb~2{uF(}5p&^>0QJSDLRHhrWNDI_YvviTp(KId5AXTVDa9%o3XK9=!X(!#H@6%1H z(zoa-dYT@m?X-h_K#$TlX%~Hi_R_<&hw_x8$EbszrRQir_0civrQ>vxPS6oLO0Ux? zdY%r_%k&Bzq8I4^y+AL~tMnx8qX+3BdW4>!Z_{r2E`5()qr=omUDQoI^c~tlTmK){ z>H*qDjSc(Fr;m9#5xEJ`WO4OhAf-wf5<}HYJmlDk1T?0uSRoy)&}$=qspaCC@qXytF!&l3deO521rEp8 zb-!H4F+NAfQ=Aje8en0JI*jC-Msytp+-!&8%z(py9SWXAvC3aLJqAV~0zzr1IZzyX zXcMqn4HbVQjJ|qvJI+gSa7S=t4(`mt5nWd|+R2W|L=C+ND%lg1Pa~%SzPXf^`b`7j zWCu)3B^$U1>g^I|&(w{@v~bdgI!^}ss>B-dD7A8g`0Z}BiXye`rcK$~p#FxRpRNt$ zdhOjrwc``qBC;mg@XnAb)q7g90aUIqkexN{;Ac_;?Zc9kNSW!bV5K78sa?k|)(KYj zIDy16oG9ri+c?UD$<#D4D($LG6ShObO09-78%D{3W**vQxH~nXy^=jHW*eoJA1Ny) z&Ceu71$}*4J(YyyvexF0m>0d#1I3;pfApX@J@23_>Jok=K|G8}&1xtQO!UsbQaks(8zAhLq)gK^c2Vj*G6 z9U5ivb-d{F{irb{(}>ghb7s)eW=;z;MkKTI#S9zG)|tpM_zX#W8RTU*B|weGWhS>x z#2GN|$DIKoxh{lLto~_+WSLDPQD)GMM;R*+m0DbriL**Y{j;-!g3IDmuIZm(lhYty zb(y&2Wi>RI;S*fy7^2;7LRgjuj|RFCt^EketcPGUR34c00H$MHTZ2Wq4#UK|tlUJs zye#7ooD~5xc{>hf3=<9WGI>%fjfp*$ju1RXxW;yr%#`d{*<-+3u+2@ltGI!dHxhyN zXS^ha4gt#;SVz*HfR3jaA>U(2dbuUe;`ZXGr69TPd9woaV~tQM0L(Y>G@q5Y$@I!Ipv_8vaoylU_>>VIEJ03n- zzEd1HKU`j3*58n9_U%ZnuUl=AbS&%~-?EL~JyBTNz6E;)*sj3emc!-B;>OK`V)sJ+ zA+=9GP44xjK)VJGSIh9zgVXbHkKq43j3S{Z z3}D|y-tEG_56U|Y0raKS+iQ5teZTlQ{pUX#OvsBr@myO;*S3Jo6H#(Ym&kevlx?~s zCuC5N%H-1$QT~khC;1b$@TEcWOI)^Mw*ueFH2vlmb`M?4M!lmucZ7MzR_Prd%cm00 z7WR&^WTpQw@95E`!aM#)|46RFJ4F5OZLc@do1zCYKjCP4M0FqWN7Uy)P<|6?O6r^|)hr}{^7y?1TfNP9j7)O+K9z1saSJCPiX<6FcG^%@X*u|wBb zWhZAXfnv1TD}OR zEvTi+^(2vxm(^(lw=0bMEO58FxF%E7I$kcLo?`rO^YB#msdcp8jM}{nv%h1fkcu==LaHL6#C2J3k5OF(@nhN|)+8qPdG zqqzCxn_G8$J_p<#0bCvT8NjAzqkJyhz6XH#jS!-Ktum1Mrr|p3rOavD4j}FfASUl4 z8m9R+jnwd)0X&l#Z5=Vviw_48RkGCpHs6cA;a)t5HvLu>V!OT?!F(iuY374I(}m{d zUii+V0n8-LY#LVIG>jMN$z3iU%Uo-m=A(nyc>K1Hm}bKbq`qmij&r0Uq=4m1=R0A0Ph}+l1L=-d;nGN zcIw(SKhwwFfew0z-VUk();A5eZzMu^!NW~9Wi?3C-WrR34*6*EGd}~?yst*a#)~eR z**k6nFVdDT89Zk{+Q8|XhI=mJqc0mYe~Vk=H1DIaeAX)rQE!{KjueZuUS&wg+gV%j zZ6HQ`<+~1Iy1m~zUM^!EeH`=J_fkA{WWd1an}&TPQmexTMW(LUDFltuyocm7YSn>S zy=LPDCu)t{X#H{->+N3Pb#gp)SVEg`e%{Jhh4ulb%fiv;FAP-kvsPpl>GrYInGOS| zZyIgE*huT)XwESajnll3m<=f>BPH50XCbQKsMGXT^H}+a|L1)yH~H0c-^FIgBOFg6 z<=UuOE+dYA8m-vJkUGRu>C-`N;6z6B z@f3%hIJ!SrCkz%p#nc8?#4ldAvGnoi=;Sg}=X z$iYH8ngezQu<4mTpD}-k_tN1k#J1O$n7@qpsBQa8ES@_Z!c&I}O|$BohQ~u>mt!;o zFFCKMp_(3pF+Ux1Fq0#YEn~&}^o)arcN=r6(f~H^rx|m!*yB9wBbt+%Z6HQw>v0!R zpZ+v(`lexzi9~o42F*QCs!^I>i(=j~>EN}!I%K@vh~46I4i;v$^jkAiJ$=(~Yex1P zikXPwD6K|nzC~km+Ib%{IZS(h@TPpcw&(0v&THDi^Fp%b$9rTv%s6ObhEYe8?@w#a z`grZPW+Xbh;NqzdMNH-NO~a!h(v}xfEaz0VMrmOrM6xGyHr~C-o@B%@;!XS#$1{h= z+o)S)Rk-Y9sn19aoW5za=VEWHS2)@|SYcyW*Ep6rH?M2cyobczj<2U!$yxk1a56km zWGCktjy&IQVDwF+y%d}I7Z^(VS%CQO#A=l0TQ473-AX|`{TV_Vn30|B#Y{~7X+s;h zk=S7=h?{&aVIcKQqrDl6jLHsT+mTVERVxk_A|ug_X2xtQR>>F_9T*ok99;IHh;B;b uyN$MKKJq?v)5qiAp0tb^%c Date: Mon, 26 Feb 2024 14:58:28 +0100 Subject: [PATCH 095/216] Added unit test scripts to the release pack (#70) --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 262e178a..1370944b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -53,7 +53,7 @@ jobs: copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll - xcopy /E /I tests\ .output\cleo + xcopy /E /I tests .output\Release\cleo @REM install Silent's ASI Loader curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip From a8d1f6257c23b9fe0e4bf36c04544fca42fce933 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 26 Feb 2024 15:22:50 +0100 Subject: [PATCH 096/216] Added get/set_audio_stream_progress opcodes to the Changelog (#71) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54380bf4..371a67a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ - new opcode **2504 ([set_audio_stream_volume_with_transition](https://library.sannybuilder.com/#/sa/audio/2504))** - new opcode **2505 ([set_audio_stream_speed_with_transition](https://library.sannybuilder.com/#/sa/audio/2505))** - new opcode **2506 ([set_audio_stream_source_size](https://library.sannybuilder.com/#/sa/audio/2506))** + - new opcode **2507 ([get_audio_stream_progress](https://library.sannybuilder.com/#/sa/audio/2507))** + - new opcode **2508 ([set_audio_stream_progress](https://library.sannybuilder.com/#/sa/audio/2508))** - new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/debug/00C3))** - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/debug/00C4))** From 3fc4b36ed692336c59428d212b0eb6ec21365919 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 27 Feb 2024 02:50:08 +0100 Subject: [PATCH 097/216] add_dynamic_GXT_entry opcode bug fix (#73) * add_dynamic_GXT_entry opcode bug fix * fixup! add_dynamic_GXT_entry opcode bug fix --- cleo_sdk/CLEO.h | 2 +- cleo_sdk/CLEO_Utils.h | 2 +- source/CCustomOpcodeSystem.cpp | 6 +++--- source/CCustomOpcodeSystem.h | 2 +- source/CScriptEngine.h | 3 ++- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 0d10c0d7..ec121020 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -490,7 +490,7 @@ SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // g void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); // read multiple params. Stored in opcodeParams array DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // returns nullptr on fail +LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // returns null terminated string, nullptr on fail LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // exactly same as CLEO_ReadStringOpcodeParam void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 78a81036..a4198ded 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -32,7 +32,7 @@ namespace CLEO OPCODE_READ_PARAM_UINT() OPCODE_READ_PARAM_FLOAT() OPCODE_READ_PARAM_STRING() // returns char* to internal buffer. It might be overwritten by another string read! - OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) + OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) // always null terminated OPCODE_READ_PARAM_FILEPATH() // returns char* to internal buffer. It might be overwritten by another string read! OPCODE_READ_PARAM_PTR() // read and validate memory address argument OPCODE_READ_PARAM_OBJECT_HANDLE() diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 3954ea45..ccaba9d7 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -399,7 +399,7 @@ namespace CLEO return thread; } - // read string parameter according to convention on strings + // read string parameter according to convention on strings. Always null terminated char* ReadStringParam(CRunningScript *thread, char* buf, DWORD bufSize) { static char internal_buf[MAX_STR_LEN]; @@ -1551,8 +1551,8 @@ namespace CLEO //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) { - char gxtLabel[8] = { 0 }; // 7 + terminator character - auto gxt = OPCODE_READ_PARAM_STRING_BUFF(gxtLabel, 7); + char gxtBuff[8]; // 7 + terminator character + auto gxt = OPCODE_READ_PARAM_STRING_BUFF(gxtBuff, sizeof(gxtBuff)); auto txt = OPCODE_READ_PARAM_STRING(); GetInstance().TextManager.AddFxt(gxt, txt); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 8e56a616..705fdb11 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -67,7 +67,7 @@ namespace CLEO bool needTerminator = false; }; - char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); + char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); // null terminated StringParamBufferInfo GetStringParamWriteBuffer(CRunningScript* thread); // consumes the param int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 6364440b..05df03b3 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -156,7 +156,8 @@ namespace CLEO extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer1)(CRunningScript *); extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer2)(CRunningScript *, int __unused__); - char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); + // reimplemented hook of original game's procedure. Null terminator ommited if not enought space in the buffer! + char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); inline SCRIPT_VAR * GetScriptParamPointer(CRunningScript *thread) { From 17684972f19426148a0f3fe4da2dd3693d799e8b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 27 Feb 2024 18:55:51 +0100 Subject: [PATCH 098/216] ReadStringParam bug fix (#74) --- source/CCustomOpcodeSystem.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index ccaba9d7..6c33dc61 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -404,9 +404,11 @@ namespace CLEO { static char internal_buf[MAX_STR_LEN]; if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } - int bufLength = (int)bufSize - 1; // max text length (minus terminator char), -1 for unknown - return CLEO::GetScriptStringParam(thread, 0, buf, bufLength); + if (bufSize > 0) buf[bufSize - 1] = '\0'; // buffer always terminated + if (bufSize <= 1) return buf; // no characters to read, done + + return GetScriptStringParam(thread, 0, buf, bufSize - 1); // do not overwrite buffer terminator } // write output\result string parameter From 274a7fae1cfa6d82e7d06752e347d8988747bca1 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 27 Feb 2024 18:56:12 +0100 Subject: [PATCH 099/216] fix audio plugin file extension (#75) * fix audio plugin file extension * Restored support for null audio streams. --- cleo_plugins/Audio/Audio.cpp | 118 ++++++++++++++++------------ cleo_plugins/Audio/Audio.vcxproj | 4 +- cleo_plugins/Audio/CAudioStream.cpp | 4 + 3 files changed, 75 insertions(+), 51 deletions(-) diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index d5009a9d..3f82aae0 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -8,7 +8,7 @@ using namespace CLEO; using namespace plugin; -#define VALIDATE_STREAM() if(!soundSystem.HasStream(stream)) { SHOW_ERROR("Invalid or already closed '0x%X' audio stream handle param in script %s \nScript suspended.", stream, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } +#define VALIDATE_STREAM() if(stream != nullptr && !soundSystem.HasStream(stream)) { SHOW_ERROR("Invalid or already closed '0x%X' audio stream handle param in script %s \nScript suspended.", stream, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } class Audio { @@ -106,7 +106,7 @@ class Audio //0AAD=2,set_audiostream %1d% perform_action %2d% static OpcodeResult __stdcall opcode_0AAD(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); VALIDATE_STREAM() + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM() auto action = OPCODE_READ_PARAM_INT(); if (stream) @@ -128,9 +128,9 @@ class Audio //0AAE=1,release_audiostream %1d% static OpcodeResult __stdcall opcode_0AAE(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - soundSystem.DestroyStream(stream); + if (stream) soundSystem.DestroyStream(stream); return OR_CONTINUE; } @@ -138,9 +138,10 @@ class Audio //0AAF=2,%2d% = get_audiostream_length %1d% static OpcodeResult __stdcall opcode_0AAF(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto length = stream->GetLength(); + auto length = 0.0f; + if (stream) length = stream->GetLength(); OPCODE_WRITE_PARAM_INT((int)length); return OR_CONTINUE; @@ -149,9 +150,10 @@ class Audio //0AB9=2,get_audio_stream_state %1d% store_to %2d% static OpcodeResult __stdcall opcode_0AB9(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto state = stream->GetState(); + auto state = CAudioStream::eStreamState::Stopped; + if (stream) state = stream->GetState(); OPCODE_WRITE_PARAM_INT(state); return OR_CONTINUE; @@ -160,9 +162,10 @@ class Audio //0ABB=2,%2d% = get_audio_stream_volume %1d% static OpcodeResult __stdcall opcode_0ABB(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto volume = stream->GetVolume(); + auto volume = 0.0f; + if (stream) volume = stream->GetVolume(); OPCODE_WRITE_PARAM_FLOAT(volume); return OR_CONTINUE; @@ -171,10 +174,10 @@ class Audio //0ABC=2,set_audiostream %1d% volume %2d% static OpcodeResult __stdcall opcode_0ABC(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto volume = OPCODE_READ_PARAM_FLOAT(); - stream->SetVolume(volume); + if (stream) stream->SetVolume(volume); return OR_CONTINUE; } @@ -182,10 +185,10 @@ class Audio //0AC0=2,loop_audiostream %1d% flag %2d% static OpcodeResult __stdcall opcode_0AC0(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto loop = OPCODE_READ_PARAM_BOOL(); - stream->SetLooping(loop); + if (stream) stream->SetLooping(loop); return OR_CONTINUE; } @@ -205,24 +208,28 @@ class Audio //0AC2=4,set_3d_audiostream %1d% position %2d% %3d% %4d% static OpcodeResult __stdcall opcode_0AC2(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); CVector pos; pos.x = OPCODE_READ_PARAM_FLOAT(); pos.y = OPCODE_READ_PARAM_FLOAT(); pos.z = OPCODE_READ_PARAM_FLOAT(); - stream->Set3dPosition(pos); + if (stream) stream->Set3dPosition(pos); + return OR_CONTINUE; } //0AC3=2,link_3d_audiostream %1d% to_object %2d% static OpcodeResult __stdcall opcode_0AC3(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto handle = OPCODE_READ_PARAM_OBJECT_HANDLE(); - auto object = CPools::GetObject(handle); - stream->Link(object); + if (stream) + { + auto object = CPools::GetObject(handle); + stream->Link(object); + } return OR_CONTINUE; } @@ -230,11 +237,14 @@ class Audio //0AC4=2,link_3d_audiostream %1d% to_actor %2d% static OpcodeResult __stdcall opcode_0AC4(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto handle = OPCODE_READ_PARAM_PED_HANDLE(); - auto ped = CPools::GetPed(handle); - stream->Link(ped); + if (stream) + { + auto ped = CPools::GetPed(handle); + stream->Link(ped); + } return OR_CONTINUE; } @@ -242,11 +252,14 @@ class Audio //0AC5=2,link_3d_audiostream %1d% to_vehicle %2d% static OpcodeResult __stdcall opcode_0AC5(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto handle = OPCODE_READ_PARAM_VEHICLE_HANDLE(); - auto vehicle = CPools::GetVehicle(handle); - stream->Link(vehicle); + if (stream) + { + auto vehicle = CPools::GetVehicle(handle); + stream->Link(vehicle); + } return OR_CONTINUE; } @@ -254,26 +267,31 @@ class Audio //2500=1, is_audio_stream_playing %1d% static OpcodeResult __stdcall opcode_2500(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto state = stream->GetState(); + auto state = CAudioStream::eStreamState::Stopped; + if (stream) state = stream->GetState(); - OPCODE_CONDITION_RESULT(state == 1); + OPCODE_CONDITION_RESULT(state == CAudioStream::eStreamState::Playing); return OR_CONTINUE; } //2501=2,%2d% = get_audiostream_duration %1d% static OpcodeResult __stdcall opcode_2501(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto length = stream->GetLength(); + auto length = 0.0f; + if (stream) + { + auto length = stream->GetLength(); - auto speed = stream->GetSpeed(); - if (speed <= 0.0f) - length = FLT_MAX; // it would take forever to play paused - else - length /= speed; // speed corrected + auto speed = stream->GetSpeed(); + if (speed <= 0.0f) + length = FLT_MAX; // it would take forever to play paused + else + length /= speed; // speed corrected + } OPCODE_WRITE_PARAM_FLOAT(length); return OR_CONTINUE; @@ -282,9 +300,10 @@ class Audio //2502=2,get_audio_stream_speed %1d% store_to %2d% static OpcodeResult __stdcall opcode_2502(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto speed = stream->GetSpeed(); + auto speed = 0.0f; + if (stream) speed = stream->GetSpeed(); OPCODE_WRITE_PARAM_FLOAT(speed); return OR_CONTINUE; @@ -293,10 +312,10 @@ class Audio //2503=2,set_audio_stream_speed %1d% speed %2d% static OpcodeResult __stdcall opcode_2503(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto speed = OPCODE_READ_PARAM_FLOAT(); - stream->SetSpeed(speed); + if (stream) stream->SetSpeed(speed); return OR_CONTINUE; } @@ -304,11 +323,11 @@ class Audio //2504=3,set_audio_stream_volume_with_transition %1d% volume %2d% time_ms %2d% static OpcodeResult __stdcall opcode_2504(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto volume = OPCODE_READ_PARAM_FLOAT(); auto time = OPCODE_READ_PARAM_INT(); - stream->SetVolume(volume, 0.001f * time); + if (stream) stream->SetVolume(volume, 0.001f * time); return OR_CONTINUE; } @@ -316,11 +335,11 @@ class Audio //2505=3,set_audio_stream_speed_with_transition %1d% speed %2d% time_ms %2d% static OpcodeResult __stdcall opcode_2505(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto speed = OPCODE_READ_PARAM_FLOAT(); auto time = OPCODE_READ_PARAM_INT(); - stream->SetSpeed(speed, 0.001f * time); + if (stream) stream->SetSpeed(speed, 0.001f * time); return OR_CONTINUE; } @@ -328,10 +347,10 @@ class Audio //2506=2,set_audio_stream_source_size %1d% radius %2d% static OpcodeResult __stdcall opcode_2506(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto radius = OPCODE_READ_PARAM_FLOAT(); - stream->Set3dSize(radius); + if (stream) stream->Set3dSize(radius); return OR_CONTINUE; } @@ -339,9 +358,10 @@ class Audio //2507=2,get_audio_stream_progress %1d% store_to %2d% static OpcodeResult __stdcall opcode_2507(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); - auto progress = stream->GetProgress(); + auto progress = 0.0f; + if (stream) progress = stream->GetProgress(); OPCODE_WRITE_PARAM_FLOAT(progress); return OR_CONTINUE; @@ -350,10 +370,10 @@ class Audio //2508=2,set_audio_stream_progress %1d% speed %2d% static OpcodeResult __stdcall opcode_2508(CScriptThread* thread) { - auto stream = (CAudioStream*)OPCODE_READ_PARAM_PTR(); VALIDATE_STREAM(); + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto speed = OPCODE_READ_PARAM_FLOAT(); - stream->SetProgress(speed); + if (stream) stream->SetProgress(speed); return OR_CONTINUE; } diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index ca1788db..c57e364c 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -46,14 +46,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ Audio - .cleo5 + .cleo $(ProjectDir)bass;$(LibraryPath) $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ Audio - .cleo5 + .cleo $(ProjectDir)bass;$(LibraryPath) diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index 74927334..24c46b2b 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -44,6 +44,10 @@ void CAudioStream::Stop() { BASS_ChannelPause(streamInternal); state = Stopped; + + // cancel ongoing transitions + speed = speedTarget; + volume = volumeTarget; } void CAudioStream::Resume() From ac40fa1de147bfd93c4bf073c261f96a6ab8355d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 28 Feb 2024 00:15:00 +0100 Subject: [PATCH 100/216] opcode 0AA4 fix (#78) --- cleo_plugins/MemoryOperations/MemoryOperations.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 58a90f7d..f90aad84 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -394,14 +394,7 @@ class MemoryOperations auto name = OPCODE_READ_PARAM_STRING(); auto ptr = (HMODULE)OPCODE_READ_PARAM_PTR(); - // validate - if (m_libraries.find(ptr) == m_libraries.end()) - { - LOG_WARNING(thread, "Invalid '0x%X' pointer param to unknown or freed library in script %s", ptr, ScriptInfoStr(thread).c_str()); - OPCODE_WRITE_PARAM_PTR(nullptr); - OPCODE_CONDITION_RESULT(false); - return OR_CONTINUE; - } + // allow any pointer, not just from 0AA2 auto funcPtr = (void*)GetProcAddress(ptr, name); From e6ab655c46ac4481484866ef6b4dbf4b9615b276 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 28 Feb 2024 01:47:00 +0100 Subject: [PATCH 101/216] Allow literal '0' as float parameter when calling opcodes (#77) * Allow literal '0' as float parameter when calling opcodes * fixup! Allow literal '0' as float parameter when calling opcodes * Fix. * fixup! Fix. * Relaxed type checking for float opcode parameters in legacy scripts. --- cleo_sdk/CLEO_Utils.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index a4198ded..5e8df6c5 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -54,6 +54,11 @@ namespace CLEO OPCODE_WRITE_PARAM_STRING(value) OPCODE_WRITE_PARAM_PTR(value) // memory address */ + + static bool IsLegacyScript(CLEO::CRunningScript* thread) + { + return CLEO_GetScriptVersion(thread) < CLEO_VER_5; + } // this plugin's config file static std::string GetConfigFilename() @@ -214,6 +219,22 @@ namespace CLEO return _paramsArray[0]; } + static SCRIPT_VAR& _readParamFloat(CRunningScript* thread) + { + auto& var = _readParam(thread); + + // people tend to use '0' instead '0.0' when providing literal float params in scripts + // binary these are equal, so can be allowed + if (var.dwParam == 0) + { + // pretend it was float type + if (IsImmInteger(_lastParamType)) _lastParamType = eDataType::DT_FLOAT; + if (_lastParamArrayType == eArrayDataType::ADT_INT) _lastParamArrayType = eArrayDataType::ADT_FLOAT; + } + + return var; + } + static SCRIPT_VAR* _readParamVariable(CRunningScript* thread) { _lastParamType = thread->PeekDataType(); @@ -402,8 +423,8 @@ namespace CLEO #define OPCODE_READ_PARAM_UINT() _readParam(thread).dwParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_READ_PARAM_FLOAT() _readParam(thread).fParam; \ - if (!_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_READ_PARAM_FLOAT() _readParamFloat(thread).fParam; \ + if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_STRING() _readParamText(thread); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } @@ -436,7 +457,7 @@ namespace CLEO #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() _readParamVariable(thread); \ if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type @@ -462,7 +483,7 @@ namespace CLEO if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ - if (!_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } From 1175adc41b5576bfe5754f607c30474b8241a2c9 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 02:59:23 +0100 Subject: [PATCH 102/216] Audio plugin name fixed (#80) --- cleo_plugins/Audio/Audio.vcxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index c57e364c..39afc1b1 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -45,14 +45,14 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - Audio + SA.Audio .cleo $(ProjectDir)bass;$(LibraryPath) $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - Audio + SA.Audio .cleo $(ProjectDir)bass;$(LibraryPath) From 01a91a811694d89fdd456c1f6377761e1b8f69ce Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 06:33:39 +0100 Subject: [PATCH 103/216] Filesystem unit tests updated. (#81) --- tests/cleo_tests/FilesystemOperations/0A9A.s | Bin 359 -> 354 bytes tests/cleo_tests/FilesystemOperations/0A9A.txt | 2 +- tests/cleo_tests/FilesystemOperations/0A9B.s | Bin 458 -> 448 bytes tests/cleo_tests/FilesystemOperations/0A9B.txt | 4 ++-- tests/cleo_tests/FilesystemOperations/0A9C.txt | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.s b/tests/cleo_tests/FilesystemOperations/0A9A.s index 9e4f0e55ca874bc54e6dcfb0506d34f1535e7891..d74a59dfddd12f95df2a3a8a92a80ac0cc210c0b 100644 GIT binary patch delta 41 ucmaFP^oVJ~SxLU+oYednJrJpvlb_DVSj5c0;LE`HXyR)V#s!lV7~KIg{SA@; delta 46 zcmaFF^qgtJSvle4oYednJrEgRl3HA%my@5)$5_P7z~IZkcz5D66UNz-r5W7;kE0K# diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index 1db554b7..bf7c5b1e 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -36,7 +36,7 @@ wait 0 // try open non existing file if // test 0A9A - 0@ = open_file "cleo\.cleo_test.log" {mode} "r" // tested opcode + 0@ = open_file "cleo\.cleo.log" {mode} "r" // tested opcode then trace "~g~~h~~h~0A9A (open_file), #1 PASSED" close_file 0@ diff --git a/tests/cleo_tests/FilesystemOperations/0A9B.s b/tests/cleo_tests/FilesystemOperations/0A9B.s index d9917f8681a2bf217f3df80e983bcf725919e7b0..5ae25f65ea934d9f54eae35fdb4fec28097fd13b 100644 GIT binary patch delta 88 zcmX@be1Lg^wlrUIPHKLP9*ES-$xr8FD$-_VVDM#NZ2v!5o>6&X9uJet|H%Q2Y7-a5 TO5s$zW3n8h0b}W8f5vJ6t!^33 delta 118 zcmX@We2RI3wuW$WPHKLP9*B%DNi8nX%gImYV=B^SW?=ATU~KyT|39N5pK4wD Date: Fri, 1 Mar 2024 10:38:57 +0100 Subject: [PATCH 104/216] Opcode string params handling tweaked (#76) * Restored original behavior of CLEO_ReadStringPointerOpcodeParam sdk export. Restored separated internal buffers for CLEO_ReadStringOpcodeParam and CLEO_ReadStringPointerOpcodeParam sdk exports. Updated opcode param string read util to refect old CLEO general approach and try to use raw source text data whenever possible. Optimized out unnecessary text copying to internal buffer. * fixup! Restored original behavior of CLEO_ReadStringPointerOpcodeParam sdk export. * Utility macro comments updated. * Return types updated. * Read string macros reworked to create unique buffer on every call. * fixup! Read string macros reworked to create unique buffer on every call. * fixup! Read string macros reworked to create unique buffer on every call. * Review fixes. * fixup! Review fixes. * Updates. * Fix. * OPCODE_READ_PARAM_FILEPATH comment updated. * Fix * ReadStringParam update. * Simplification. * Double copy fixed. * Fixes. * Review fixes. --- CHANGELOG.md | 1 + cleo_plugins/Audio/Audio.cpp | 4 +- .../FileSystemOperations.cpp | 69 ++++----- cleo_plugins/IniFiles/IniFiles.cpp | 44 +++--- .../MemoryOperations/MemoryOperations.cpp | 16 +- cleo_sdk/CLEO.h | 11 +- cleo_sdk/CLEO_Utils.h | 34 ++-- source/CCustomOpcodeSystem.cpp | 146 ++++++++++-------- source/CCustomOpcodeSystem.h | 6 +- source/CScriptEngine.cpp | 12 +- source/CScriptEngine.h | 6 +- source/cleo.def | 11 +- 12 files changed, 181 insertions(+), 179 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 371a67a8..30f56264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ - new SDK method: CLEO_GetVarArgCount - new SDK method: CLEO_PeekIntOpcodeParam - new SDK method: CLEO_PeekFloatOpcodeParam +- new SDK method: CLEO_PeekPointerToScriptVariable - new SDK method: CLEO_SkipUnusedVarArgs - new SDK method: CLEO_ReadParamsFormatted - new SDK method: CLEO_ReadStringParamWriteBuffer diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index 3f82aae0..e7810ffd 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -94,7 +94,7 @@ class Audio //0AAC=2, %2d% = load_audiostream %1d% // IF and SET static OpcodeResult __stdcall opcode_0AAC(CScriptThread* thread) { - auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(path); auto ptr = soundSystem.CreateStream(path); @@ -196,7 +196,7 @@ class Audio //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET static OpcodeResult __stdcall opcode_0AC1(CScriptThread* thread) { - auto path = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(path); auto ptr = soundSystem.CreateStream(path, true); diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 721eba3b..15e08b9c 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -99,13 +99,11 @@ class FileSystemOperations LOG_WARNING(0, "Value (%d) not known by opcode [0A99] in script %s", idx, ScriptInfoStr(thread).c_str()); return OR_CONTINUE; } - - CLEO_SetScriptWorkDir(thread, path); - return OR_CONTINUE; } else { - path = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(str); + path = str; } CLEO_SetScriptWorkDir(thread, path); @@ -115,7 +113,7 @@ class FileSystemOperations //0A9A=3,%3d% = openfile %1d% mode %2d% // IF and SET static OpcodeResult WINAPI opcode_0A9A(CRunningScript* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); char mode[16]; auto paramType = CLEO_GetOperandType(thread); @@ -132,7 +130,8 @@ class FileSystemOperations } else { - OPCODE_READ_PARAM_STRING_BUFF(mode, sizeof(mode)); + OPCODE_READ_PARAM_STRING_LEN(strMode, sizeof(mode) - 1); // leave space for terminator char + strcpy(mode, strMode); } // either CLEO 3 or CLEO 4 made a big mistake! (they differ in one major unapparent preference) @@ -252,7 +251,7 @@ class FileSystemOperations // 0AAB=1, does_file_exist %1s% static OpcodeResult WINAPI Script_FS_FileExists(CRunningScript* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); DWORD fAttr = GetFileAttributes(filename); bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && !(fAttr & FILE_ATTRIBUTE_DIRECTORY); @@ -337,7 +336,7 @@ class FileSystemOperations static OpcodeResult WINAPI opcode_0AD8(CRunningScript* thread) { auto handle = READ_FILE_HANDLE_PARAM(); - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); auto ok = File::writeString(handle, text); if (!ok) @@ -355,7 +354,7 @@ class FileSystemOperations static OpcodeResult WINAPI opcode_0AD9(CRunningScript* thread) { auto handle = READ_FILE_HANDLE_PARAM(); - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); static char text[4 * MAX_STR_LEN]; CLEO_ReadParamsFormatted(thread, format, text, MAX_STR_LEN); auto ok = File::writeString(handle, text); @@ -372,7 +371,7 @@ class FileSystemOperations static OpcodeResult WINAPI opcode_0ADA(CRunningScript* thread) { auto handle = READ_FILE_HANDLE_PARAM(); - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); auto result = OPCODE_READ_PARAM_OUTPUT_VAR(); size_t paramCount = 0; @@ -394,7 +393,7 @@ class FileSystemOperations // 0AE4=1, directory_exist %1s% static OpcodeResult WINAPI Script_FS_DirectoryExists(CRunningScript* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); DWORD fAttr = GetFileAttributes(filename); bool exists = (fAttr != INVALID_FILE_ATTRIBUTES) && (fAttr & FILE_ATTRIBUTE_DIRECTORY); @@ -406,7 +405,7 @@ class FileSystemOperations // 0AE5=1, create_directory %1s% //IF and SET static OpcodeResult WINAPI Script_FS_CreateDirectory(CRunningScript* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); bool result = CreateDirectory(filename, NULL) != 0; @@ -417,7 +416,7 @@ class FileSystemOperations // 0AE6=3, %2d% = find_first_file %1s% get_filename_to %3s% //IF and SET static OpcodeResult WINAPI Script_FS_FindFirstFile(CRunningScript* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); WIN32_FIND_DATA ffd = { 0 }; HANDLE handle = FindFirstFile(filename, &ffd); @@ -483,7 +482,7 @@ class FileSystemOperations // 0B00=1, delete_file %1s% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteFile(CScriptThread* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); auto success = DeleteFile(filename); @@ -539,7 +538,7 @@ class FileSystemOperations // 0B01=1, delete_directory %1s% with_all_files_and_subdirectories %2d% //IF and SET static OpcodeResult WINAPI Script_FS_DeleteDirectory(CScriptThread* thread) { - auto filename = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filename); auto deleteContents = OPCODE_READ_PARAM_BOOL(); BOOL result; @@ -561,14 +560,12 @@ class FileSystemOperations // 0B02=2, move_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveFile(CScriptThread* thread) { - auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); - auto filepath = std::string(tmpStr); // store before reusing buffer - - auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filepath); + OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; + BOOL result = GetFileAttributes(filepath) & FILE_ATTRIBUTE_DIRECTORY; if (!result) - result = MoveFile(filepath.c_str(), newFilepath); + result = MoveFile(filepath, newFilepath); OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; @@ -577,14 +574,12 @@ class FileSystemOperations // 0B03=2, move_directory %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_MoveDir(CScriptThread* thread) { - auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); - auto filepath = std::string(tmpStr); // store before reusing buffer + OPCODE_READ_PARAM_FILEPATH(filepath); + OPCODE_READ_PARAM_FILEPATH(newFilepath); - auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); - - BOOL result = GetFileAttributes(filepath.c_str()) & FILE_ATTRIBUTE_DIRECTORY; + BOOL result = GetFileAttributes(filepath) & FILE_ATTRIBUTE_DIRECTORY; if (result) - result = MoveFile(filepath.c_str(), newFilepath); + result = MoveFile(filepath, newFilepath); OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; @@ -593,16 +588,14 @@ class FileSystemOperations // 0B04=2, copy_file %1s% to %2s% //IF and SET static OpcodeResult WINAPI Script_FS_CopyFile(CScriptThread* thread) { - auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); - auto filepath = std::string(tmpStr); // store before reusing buffer - - auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filepath); + OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = CopyFile(filepath.c_str(), newFilepath, FALSE); + BOOL result = CopyFile(filepath, newFilepath, FALSE); if (result) { // copy file attributes - DWORD fattr = GetFileAttributes(filepath.c_str()); + DWORD fattr = GetFileAttributes(filepath); SetFileAttributes(newFilepath, fattr); } @@ -664,12 +657,10 @@ class FileSystemOperations // 0B05=2, copy_directory %1d% to %2d% //IF and SET static OpcodeResult WINAPI Script_FS_CopyDir(CScriptThread* thread) { - auto tmpStr = OPCODE_READ_PARAM_FILEPATH(); - auto filepath = std::string(tmpStr); // store before reusing buffer - - auto newFilepath = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(filepath); + OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = CopyDir(filepath.c_str(), newFilepath); + BOOL result = CopyDir(filepath, newFilepath); OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; @@ -720,7 +711,7 @@ class FileSystemOperations //2302=2,%2s% = resolve_filepath %1s% static OpcodeResult __stdcall opcode_2302(CRunningScript* thread) { - auto path = OPCODE_READ_PARAM_FILEPATH(); // it also resolves the path to absolute form + OPCODE_READ_PARAM_FILEPATH(path); // it also resolves the path to absolute form OPCODE_WRITE_PARAM_STRING(path); return OR_CONTINUE; diff --git a/cleo_plugins/IniFiles/IniFiles.cpp b/cleo_plugins/IniFiles/IniFiles.cpp index 0064d1e6..054c27f3 100644 --- a/cleo_plugins/IniFiles/IniFiles.cpp +++ b/cleo_plugins/IniFiles/IniFiles.cpp @@ -29,19 +29,15 @@ class IniFiles } } - // resused globals to cut down allocations - static char section[128]; - static char key[128]; - static OpcodeResult WINAPI Script_InifileGetInt(CScriptThread* thread) /**************************************************************** Opcode Format 0AF0=4,%4d% = get_int_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); auto result = GetPrivateProfileInt(section, key, 0x80000000, path); @@ -57,9 +53,9 @@ class IniFiles ****************************************************************/ { auto value = OPCODE_READ_PARAM_INT(); - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); char strValue[32]; _itoa(value, strValue, 10); @@ -75,9 +71,9 @@ class IniFiles 0AF2=4,%4d% = get_float_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); auto value = 0.0f; char strValue[32]; @@ -102,9 +98,9 @@ class IniFiles ****************************************************************/ { auto value = OPCODE_READ_PARAM_FLOAT(); - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); char strValue[32]; sprintf(strValue, "%g", value); @@ -120,9 +116,9 @@ class IniFiles 0AF4=4,%4d% = read_string_from_ini_file %1s% section %2s% key %3s% ****************************************************************/ { - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); char strValue[MAX_STR_LEN]; auto result = GetPrivateProfileString(section, key, NULL, strValue, sizeof(strValue), path); @@ -144,10 +140,10 @@ class IniFiles 0AF5=4,write_string %1s% to_ini_file %2s% section %3s% key %4s% ****************************************************************/ { - char strValue[MAX_STR_LEN]; OPCODE_READ_PARAM_STRING_BUFF(strValue, sizeof(strValue)); - auto path = OPCODE_READ_PARAM_FILEPATH(); - OPCODE_READ_PARAM_STRING_BUFF(section, sizeof(section)); - OPCODE_READ_PARAM_STRING_BUFF(key, sizeof(key)); + OPCODE_READ_PARAM_STRING(strValue); + OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(section); + OPCODE_READ_PARAM_STRING(key); auto result = WritePrivateProfileString(section, key, strValue, path); @@ -156,5 +152,3 @@ class IniFiles } } iniFiles; -char IniFiles::section[128]; -char IniFiles::key[128]; diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index f90aad84..02a9160f 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -118,7 +118,9 @@ class MemoryOperations return thread->Suspend(); } - param.pcParam = OPCODE_READ_PARAM_STRING_BUFF(textParams[currTextParam], MAX_STR_LEN); + OPCODE_READ_PARAM_STRING_LEN(str, MAX_STR_LEN); + strcpy(textParams[currTextParam], str); + param.pcParam = textParams[currTextParam]; currTextParam++; } else if (IsImmInteger(paramType) || IsImmFloat(paramType) || IsVariable(paramType)) @@ -358,9 +360,9 @@ class MemoryOperations //0AA2=2, load_dynamic_library %1s% store_to %2d% // IF and SET static OpcodeResult __stdcall opcode_0AA2(CLEO::CRunningScript* thread) { - auto str = OPCODE_READ_PARAM_FILEPATH(); + OPCODE_READ_PARAM_FILEPATH(path); - auto ptr = LoadLibrary(str); + auto ptr = LoadLibrary(path); if (ptr != nullptr) { m_libraries.insert(ptr); @@ -391,7 +393,7 @@ class MemoryOperations //0AA4=3, get_proc_address %1d% library %2d% result %3d% // IF and SET static OpcodeResult __stdcall opcode_0AA4(CLEO::CRunningScript* thread) { - auto name = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(name); auto ptr = (HMODULE)OPCODE_READ_PARAM_PTR(); // allow any pointer, not just from 0AA2 @@ -448,7 +450,7 @@ class MemoryOperations //0AAA=2, get_script_struct_named %1d% pointer %2d% // IF and SET static OpcodeResult __stdcall opcode_0AAA(CLEO::CRunningScript *thread) { - auto name = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(name); auto ptr = CLEO_GetScriptByName(name, true, true, 0); @@ -711,7 +713,7 @@ class MemoryOperations } else if (IsImmString(valueType) || IsVarString(valueType)) { - auto str = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(str); auto len = (int)strlen(str); memcpy(ptr + offset, str, min(size, len)); @@ -768,7 +770,7 @@ class MemoryOperations //2406=1, get_script_struct_from_filename %1s% static OpcodeResult __stdcall opcode_2406(CLEO::CScriptThread* thread) { - auto filename = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(filename); auto address = CLEO_GetScriptByFilename(filename); diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index ec121020..d2b27f09 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -58,13 +58,13 @@ enum eDataType : BYTE DT_FLOAT, // literal float 32 DT_VAR_ARRAY, // globalArr $(,) DT_LVAR_ARRAY, // localArr @(,) - DT_TEXTLABEL, // literal sstring '' + DT_TEXTLABEL, // literal string up to 7 chars DT_VAR_TEXTLABEL, // globalVarSString s$ DT_LVAR_TEXTLABEL, // localVarSString @s DT_VAR_TEXTLABEL_ARRAY, // globalVarSStringArr s$(,) DT_LVAR_TEXTLABEL_ARRAY, // localVarSStringArr @s(,) DT_VARLEN_STRING, // literal vstring "" - DT_STRING, + DT_STRING, // literal string up to 15 chars DT_VAR_STRING, // globalVarVString v$ DT_LVAR_STRING, // localVarVString @v DT_VAR_STRING_ARRAY, // globalVarStringArr v$(,) @@ -486,17 +486,18 @@ SCRIPT_VAR* WINAPI CLEO_GetOpcodeParamsArray(); // get pointer to 'SCRIPT_VAR[32 BYTE WINAPI CLEO_GetParamsHandledCount(); // number of already read/written opcode parameters since current opcode handler was called // param read -SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // get pointer to the variable data. Advances script to next param +SCRIPT_VAR* WINAPI CLEO_GetPointerToScriptVariable(CRunningScript* thread); // get pointer to the variable's data, nullptr if parameter is not variable. Advances script to next param void WINAPI CLEO_RetrieveOpcodeParams(CRunningScript* thread, int count); // read multiple params. Stored in opcodeParams array DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); -LPSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // returns null terminated string, nullptr on fail -LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buf = nullptr, int bufSize = 0); // exactly same as CLEO_ReadStringOpcodeParam +LPCSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buff = nullptr, int buffSize = 0); // read always null-terminated string into buffer, clamped to its size. If no buffer provided then internal, globally shared by all CLEO_ReadStringOpcodeParam calls, is used. Returns pointer to the result buffer or nullptr on fail +LPCSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buff = nullptr, int buffSize = 0); // read always null-terminated string into buffer, clamped to its size. If no buffer provided then internal, globally shared by all CLEO_ReadStringPointerOpcodeParam calls, is used. WARNING: returned pointer may differ from buff and contain string longer than buffSize (ptr to original data source) void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator // get param value without advancing the script DWORD WINAPI CLEO_PeekIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_PeekFloatOpcodeParam(CRunningScript* thread); +SCRIPT_VAR* WINAPI CLEO_PeekPointerToScriptVariable(CRunningScript* thread); // get pointer to the variable's data, nullptr if parameter is not variable // param skip without reading void WINAPI CLEO_SkipOpcodeParams(CRunningScript* thread, int count); diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 5e8df6c5..5fe37d42 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -31,14 +31,14 @@ namespace CLEO OPCODE_READ_PARAM_INT() OPCODE_READ_PARAM_UINT() OPCODE_READ_PARAM_FLOAT() - OPCODE_READ_PARAM_STRING() // returns char* to internal buffer. It might be overwritten by another string read! - OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) // always null terminated - OPCODE_READ_PARAM_FILEPATH() // returns char* to internal buffer. It might be overwritten by another string read! + OPCODE_READ_PARAM_STRING(varName) // reads param and creates const char* variable named 'varName' with pointer to null-terminated string + OPCODE_READ_PARAM_STRING_LEN(varName, maxLength) // same as above, but text length is clamped to maxLength + OPCODE_READ_PARAM_FILEPATH(varName) // reads param and creates const char* variable named 'varName' with pointer to resolved, null-terminated, filepath OPCODE_READ_PARAM_PTR() // read and validate memory address argument - OPCODE_READ_PARAM_OBJECT_HANDLE() - OPCODE_READ_PARAM_PED_HANDLE() - OPCODE_READ_PARAM_VEHICLE_HANDLE() - OPCODE_READ_PARAM_OUTPUT_VAR() // pointer to write result later + OPCODE_READ_PARAM_OBJECT_HANDLE() // read and validate game object handle + OPCODE_READ_PARAM_PED_HANDLE() // read and validate character (ped/actor) handle + OPCODE_READ_PARAM_VEHICLE_HANDLE() // read and validate vehicle handle + OPCODE_READ_PARAM_OUTPUT_VAR() // store variable param pointer to write result later OPCODE_READ_PARAM_OUTPUT_VAR_INT() // pointer to write integer result later OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() // pointer to write float result later @@ -303,7 +303,7 @@ namespace CLEO return IsVariable(_lastParamType); } - static char* _readParamText(CRunningScript* thread, char* buffer = nullptr, DWORD bufferSize = 0) + static const char* _readParamText(CRunningScript* thread, char* buffer, size_t bufferSize) { _lastParamType = thread->PeekDataType(); _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; @@ -316,7 +316,8 @@ namespace CLEO return nullptr; } - auto str = CLEO_ReadStringOpcodeParam(thread, buffer, bufferSize); + auto str = CLEO_ReadStringPointerOpcodeParam(thread, buffer, bufferSize); // returns pointer to source data whenever possible + if (str == nullptr) // other error? { SHOW_ERROR("Invalid input argument #%d in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); @@ -328,15 +329,6 @@ namespace CLEO return str; } - static char* _readParamFilepath(CRunningScript* thread) - { - auto str = _readParamText(thread); - if (str == nullptr) return nullptr; - - CLEO_ResolvePath(thread, str, MAX_STR_LEN); // uses generic readStringParam's buffer - return str; - } - static bool _writeParamText(CRunningScript* thread, const char* str) { _lastParamType = thread->PeekDataType(); @@ -426,11 +418,11 @@ namespace CLEO #define OPCODE_READ_PARAM_FLOAT() _readParamFloat(thread).fParam; \ if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_READ_PARAM_STRING() _readParamText(thread); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + #define OPCODE_READ_PARAM_STRING(_varName) char _buff_##_varName[MAX_STR_LEN + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, MAX_STR_LEN + 1); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_READ_PARAM_STRING_BUFF(_buffer, _bufferSize) _readParamText(thread, _buffer, _bufferSize); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + #define OPCODE_READ_PARAM_STRING_LEN(_varName, _maxLen) char _buff_##_varName[_maxLen + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, _maxLen + 1); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_READ_PARAM_FILEPATH() _readParamFilepath(thread); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 6c33dc61..65d1bc3a 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -399,16 +399,10 @@ namespace CLEO return thread; } - // read string parameter according to convention on strings. Always null terminated - char* ReadStringParam(CRunningScript *thread, char* buf, DWORD bufSize) + const char* ReadStringParam(CRunningScript *thread, char* buff, int buffSize) { - static char internal_buf[MAX_STR_LEN]; - if (!buf) { buf = internal_buf; bufSize = MAX_STR_LEN; } - - if (bufSize > 0) buf[bufSize - 1] = '\0'; // buffer always terminated - if (bufSize <= 1) return buf; // no characters to read, done - - return GetScriptStringParam(thread, 0, buf, bufSize - 1); // do not overwrite buffer terminator + if (buffSize > 0) buff[buffSize - 1] = '\0'; // buffer always terminated + return GetScriptStringParam(thread, 0, buff, buffSize - 1); // minus terminator } // write output\result string parameter @@ -513,7 +507,7 @@ namespace CLEO unsigned int written = 0; const char *iter = format; char* outIter = outputStr; - char bufa[256], fmtbufa[64], *fmta; + char bufa[MAX_STR_LEN + 1], fmtbufa[64], *fmta; CCustomOpcodeSystem::lastErrorMsg.clear(); @@ -605,18 +599,8 @@ namespace CLEO const char* str = ReadStringParam(thread, bufa, sizeof(bufa)); if(str == nullptr) // read error { - if(CCustomOpcodeSystem::lastErrorMsg.find("'null' pointer") != std::string::npos) - { - static const char none[] = "(null)"; - str = none; - } - else - { - // lastErrorMsg already set by ReadStringParam - SkipUnusedVarArgs(thread); - outputStr[written] = '\0'; - return -1; // error - } + static const char none[] = "(INVALID_STR)"; + str = none; } while (*str) @@ -756,7 +740,7 @@ namespace CLEO { argumentIsStr[i] = true; - auto str = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(str); stringParams.emplace_front(str); arg->pcParam = stringParams.front().data(); } @@ -853,7 +837,7 @@ namespace CLEO //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { - auto path = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(path); auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName().c_str()); @@ -893,7 +877,7 @@ namespace CLEO //0A94=-1,create_custom_mission %1d% OpcodeResult __stdcall opcode_0A94(CRunningScript *thread) { - auto path = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(path); auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension @@ -970,37 +954,38 @@ namespace CLEO OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread) { int label = 0; + std::string moduleTxt; - char* moduleTxt = nullptr; - auto paramType = (eDataType)*thread->GetBytePointer(); + auto paramType = thread->PeekDataType(); if (IsImmInteger(paramType) || IsVariable(paramType)) { *thread >> label; // label offset } else if (IsImmString(paramType) || IsVarString(paramType)) { - moduleTxt = ReadStringParam(thread); // string with module and export name + char tmp[MAX_STR_LEN + 1]; + auto str = ReadStringParam(thread, tmp, sizeof(tmp)); // string with module and export name + if (str != nullptr) moduleTxt = str; } else { - SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type of first argument in opcode [0AB1], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } ScmFunction* scmFunc = new ScmFunction(thread); // parse module reference text - if (moduleTxt != nullptr) + if (!moduleTxt.empty()) { - std::string_view str(moduleTxt); - auto pos = str.find('@'); - if (pos == str.npos) + auto pos = moduleTxt.find('@'); + if (pos == moduleTxt.npos) { - SHOW_ERROR("Invalid module reference '%s' in opcode [0AB1] in script %s \nScript suspended.", moduleTxt, ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid module reference '%s' in opcode [0AB1] in script %s \nScript suspended.", moduleTxt.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } - std::string_view strExport = str.substr(0, pos); - std::string_view strModule = str.substr(pos + 1); + std::string_view strExport = moduleTxt.substr(0, pos); + std::string_view strModule = moduleTxt.substr(pos + 1); // get module's file absolute path auto modulePath = std::string(strModule); @@ -1010,7 +995,7 @@ namespace CLEO auto scriptRef = GetInstance().ModuleSystem.GetExport(modulePath, strExport); if (!scriptRef.Valid()) { - SHOW_ERROR("Not found module '%s' export '%s', requested by opcode [0AB1] in script %s", modulePath.c_str(), &str[0], ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Not found module '%s' export '%s', requested by opcode [0AB1] in script %s", modulePath.c_str(), moduleTxt.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } scmFunc->moduleExportRef = scriptRef.base; // to be released on return @@ -1023,7 +1008,7 @@ namespace CLEO // "number of input parameters" opcode argument DWORD nParams = 0; - paramType = (eDataType)*thread->GetBytePointer(); + paramType = thread->PeekDataType(); if (paramType != DT_END) { if (IsImmInteger(paramType)) @@ -1032,7 +1017,7 @@ namespace CLEO } else { - SHOW_ERROR("Invalid type of first argument in opcode [0AB1], in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); + SHOW_ERROR("Invalid type (%s) of the 'input param count' argument in opcode [0AB1] in script %s \nScript suspended.", ToKindStr(paramType), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } } @@ -1082,7 +1067,9 @@ namespace CLEO } else if (IsImmString(paramType)) // those texts exists in script code, but without terminator character. Copy is necessary { - scmFunc->stringParams.emplace_back(ReadStringParam(thread)); + char tmp[MAX_STR_LEN + 1]; + auto str = ReadStringParam(thread, tmp, sizeof(tmp)); + scmFunc->stringParams.emplace_back(str); arg->pcParam = (char*)scmFunc->stringParams.back().c_str(); } else @@ -1284,7 +1271,7 @@ namespace CLEO //0ABA=1,end_custom_thread_named %1d% OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) { - auto threadName = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(threadName); auto deleted_thread = (CCustomScript*)GetInstance().ScriptEngine.FindScriptNamed(threadName, false, true, 0); if (deleted_thread) @@ -1333,7 +1320,7 @@ namespace CLEO //0ACA=1,show_text_box %1d% OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) { - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); PrintHelp(text); return OR_CONTINUE; } @@ -1341,7 +1328,7 @@ namespace CLEO //0ACB=3,show_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) { - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); auto style = OPCODE_READ_PARAM_INT(); @@ -1352,7 +1339,7 @@ namespace CLEO //0ACC=2,show_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) { - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); Print(text, time); @@ -1362,7 +1349,7 @@ namespace CLEO //0ACD=2,show_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) { - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); PrintNow(text, time); @@ -1372,7 +1359,7 @@ namespace CLEO //0ACE=-1,show_formatted_text_box %1d% OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread) { - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) PrintHelp(text); @@ -1382,7 +1369,7 @@ namespace CLEO //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread) { - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); auto style = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) @@ -1394,7 +1381,7 @@ namespace CLEO //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread) { - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) @@ -1405,7 +1392,7 @@ namespace CLEO //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread) { - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) @@ -1440,7 +1427,7 @@ namespace CLEO OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) { auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) WriteStringParam(resultArg, text); @@ -1450,8 +1437,8 @@ namespace CLEO //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread) { - auto src = OPCODE_READ_PARAM_STRING(); - char format[MAX_STR_LEN]; OPCODE_READ_PARAM_STRING_BUFF(format, MAX_STR_LEN); + OPCODE_READ_PARAM_STRING(src); + OPCODE_READ_PARAM_STRING(format); auto resultType = thread->PeekDataType(); if (!IsVariable(resultType) && IsVarString(resultType)) @@ -1509,7 +1496,7 @@ namespace CLEO //0ADC=1, test_cheat %1d% OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - auto text = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(text); SetScriptCondResult(thread, TestCheat(text)); return OR_CONTINUE; } @@ -1535,7 +1522,7 @@ namespace CLEO //0ADE=2,%2d% = text_by_GXT_entry %1d% OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread) { - auto gxt = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long auto txt = GetInstance().TextManager.Get(gxt); @@ -1553,9 +1540,8 @@ namespace CLEO //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) { - char gxtBuff[8]; // 7 + terminator character - auto gxt = OPCODE_READ_PARAM_STRING_BUFF(gxtBuff, sizeof(gxtBuff)); - auto txt = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long + OPCODE_READ_PARAM_STRING(txt); GetInstance().TextManager.AddFxt(gxt, txt); return OR_CONTINUE; @@ -1564,7 +1550,7 @@ namespace CLEO //0AE0=1,remove_dynamic_GXT_entry %1d% OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread) { - auto gxt = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long GetInstance().TextManager.RemoveFxt(gxt); return OR_CONTINUE; @@ -1711,7 +1697,7 @@ namespace CLEO { // this opcode is useless now auto val = OPCODE_READ_PARAM_FLOAT(); - auto format = OPCODE_READ_PARAM_STRING(); + OPCODE_READ_PARAM_STRING(format); auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) sprintf_s(resultArg.data, resultArg.size, format, val); @@ -1821,19 +1807,30 @@ extern "C" *thread << value; } - LPSTR WINAPI CLEO_ReadStringOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) + LPCSTR WINAPI CLEO_ReadStringOpcodeParam(CLEO::CRunningScript* thread, char* buff, int buffSize) { - return CLEO_ReadStringPointerOpcodeParam(thread, buf, size); // always support all string param types + static char internal_buff[MAX_STR_LEN + 1]; // and terminator + if (!buff) + { + buff = internal_buff; + buffSize = (buffSize > 0) ? min(buffSize, sizeof(internal_buff)) : sizeof(internal_buff); // allow user's length limit + } + + auto result = ReadStringParam(thread, buff, buffSize); + return (result != nullptr) ? buff : nullptr; } - LPSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char *buf, int size) + LPCSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CLEO::CRunningScript* thread, char* buff, int buffSize) { - auto result = ReadStringParam(thread, buf, size); - - if (result == nullptr) - LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + static char internal_buff[MAX_STR_LEN + 1]; // and terminator + bool userBuffer = buff != nullptr; + if (!userBuffer) + { + buff = internal_buff; + buffSize = (buffSize > 0) ? min(buffSize, sizeof(internal_buff)) : sizeof(internal_buff); // allow user's length limit + } - return result; + return ReadStringParam(thread, buff, buffSize); } void WINAPI CLEO_ReadStringParamWriteBuffer(CLEO::CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator) @@ -1889,6 +1886,21 @@ extern "C" return result; } + SCRIPT_VAR* WINAPI CLEO_PeekPointerToScriptVariable(CLEO::CRunningScript* thread) + { + // store state + auto ip = thread->CurrentIP; + auto count = GetInstance().OpcodeSystem.handledParamCount; + + auto result = GetScriptParamPointer(thread); + + // restore state + thread->CurrentIP = ip; + GetInstance().OpcodeSystem.handledParamCount = count; + + return result; + } + SCRIPT_VAR* WINAPI CLEO_GetOpcodeParamsArray() { return opcodeParams; diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 705fdb11..5dc984b7 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -67,7 +67,11 @@ namespace CLEO bool needTerminator = false; }; - char* ReadStringParam(CRunningScript* thread, char* buf = nullptr, DWORD bufSize = 0); // null terminated + // Read null-terminated string into the buffer + // returns pointer to string or nullptr on fail + // WARNING: returned pointer may differ from buff and contain string longer than buffSize (ptr to original data source) + const char* ReadStringParam(CRunningScript* thread, char* buff, int buffSize); + StringParamBufferInfo GetStringParamWriteBuffer(CRunningScript* thread); // consumes the param int ReadFormattedString(CRunningScript* thread, char* buf, DWORD bufSize, const char* format); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index d2f82ff7..0ff02433 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -129,11 +129,13 @@ namespace CLEO return (SCRIPT_VAR*)((size_t)result + pScript->GetBasePointer()); } - char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen) + const char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen) { - if (buff == nullptr || buffLen == 0) return buff; - - if (buffLen < 0) buffLen = 0x7FFFFFFF; // unknown - unlimited + if (buff == nullptr || buffLen < 0) + { + LOG_WARNING(0, "Invalid ReadStringParam input argument! Ptr: 0x%08X, Size: %d", buff, buffLen); + return nullptr; + } auto paramType = thread->PeekDataType(); auto arrayType = IsArray(paramType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; @@ -153,7 +155,7 @@ namespace CLEO auto len = min((int)strlen(opcodeParams[0].pcParam), buffLen); memcpy(buff, opcodeParams[0].pcParam, len); if (len < buffLen) buff[len] = '\0'; // add terminator if possible - return buff; + return opcodeParams[0].pcParam; // pointer to original data } else if (paramType == DT_VARLEN_STRING) { diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 05df03b3..fb73e70b 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -156,8 +156,10 @@ namespace CLEO extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer1)(CRunningScript *); extern SCRIPT_VAR * (__thiscall * GetScriptParamPointer2)(CRunningScript *, int __unused__); - // reimplemented hook of original game's procedure. Null terminator ommited if not enought space in the buffer! - char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); + // reimplemented hook of original game's procedure + // returns buff or pointer provided by script, nullptr on fail + // WARNING: Null terminator ommited if not enought space in the buffer! + const char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); inline SCRIPT_VAR * GetScriptParamPointer(CRunningScript *thread) { diff --git a/source/cleo.def b/source/cleo.def index 2c85e6de..dd5e898a 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -42,8 +42,9 @@ EXPORTS _CLEO_GetParamsHandledCount@0 @39 _CLEO_PeekIntOpcodeParam@4 @40 _CLEO_PeekFloatOpcodeParam@4 @41 - _CLEO_GetScriptByName@16 @42 - _CLEO_GetScriptByFilename@8 @43 - _CLEO_GetScriptFilename@4 @44 - _CLEO_GetScriptWorkDir@4 @45 - _CLEO_SetScriptWorkDir@8 @46 \ No newline at end of file + _CLEO_PeekPointerToScriptVariable@4 @42 + _CLEO_GetScriptByName@16 @43 + _CLEO_GetScriptByFilename@8 @44 + _CLEO_GetScriptFilename@4 @45 + _CLEO_GetScriptWorkDir@4 @46 + _CLEO_SetScriptWorkDir@8 @47 \ No newline at end of file From fb02e5088683a696be413be0e7062c8e127c0e4b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 14:55:49 +0100 Subject: [PATCH 105/216] Implemented SBL json info support. (#72) * Implemented SBL json info support. Printing command names in warning and error messages. Added CLEO_RegisterCommand export to SDK. * Updated error messages. --- .github/workflows/main.yml | 4 + CHANGELOG.md | 1 + CLEO5.vcxproj | 8 +- CLEO5.vcxproj.filters | 6 + README.md | 1 + cleo_sdk/CLEO.h | 1 + source/CCustomOpcodeSystem.cpp | 32 +- source/CScriptEngine.cpp | 12 +- source/CleoBase.cpp | 2 + source/CleoBase.h | 2 + source/OpcodeInfoDatabase.cpp | 194 +++++++++ source/OpcodeInfoDatabase.h | 31 ++ source/cleo.def | 3 +- third-party/SimpleJSON/README.md | 98 +++++ third-party/SimpleJSON/json.hpp | 650 +++++++++++++++++++++++++++++++ 15 files changed, 1038 insertions(+), 7 deletions(-) create mode 100644 source/OpcodeInfoDatabase.cpp create mode 100644 source/OpcodeInfoDatabase.h create mode 100644 third-party/SimpleJSON/README.md create mode 100644 third-party/SimpleJSON/json.hpp diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1370944b..3eaa4426 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,6 +42,7 @@ jobs: run: | @REM create output directory mkdir .output\Release\cleo + mkdir .output\Release\.config mkdir .output\Release\cleo\cleo_modules mkdir .output\Release\cleo\cleo_plugins mkdir .output\Release\cleo\cleo_saves @@ -54,6 +55,9 @@ jobs: copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll xcopy /E /I tests .output\Release\cleo + + @REM download Sanny Builder Library json + curl https://raw.githubusercontent.com/sannybuilder/library/master/sa/sa.json -o .output\Release\cleo\.config\sa.json @REM install Silent's ASI Loader curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip diff --git a/CHANGELOG.md b/CHANGELOG.md index 30f56264..ba167ef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ #### SDK AND PLUGINS - now all opcodes in range **0-7FFF** can be registered by plugins - plugins moved to _cleo\cleo_plugins_ directory +- new SDK method: CLEO_RegisterCommand - new SDK method: CLEO_RegisterCallback - new SDK method: CLEO_GetVarArgCount - new SDK method: CLEO_PeekIntOpcodeParam diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 9a3fbe4b..bfb272e5 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -50,6 +50,7 @@ + @@ -77,6 +78,7 @@ + @@ -126,14 +128,14 @@ $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\SimpleJSON;$(IncludePath) $(SolutionDir).output\$(Configuration)\ $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(IncludePath) + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\SimpleJSON;$(IncludePath) $(GTA_SA_DIR)\gta_sa.exe @@ -149,7 +151,7 @@ true true MultiThreaded - $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;%(AdditionalIncludeDirectories) + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;%(AdditionalIncludeDirectories) _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) Create diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 841af70d..a6b4f839 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -102,6 +102,9 @@ plugin_sdk + + source\utils + @@ -167,6 +170,9 @@ cleo_sdk + + source\utils + diff --git a/README.md b/README.md index cae11b92..58018a3a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ CLEO requires an 'ASI Loader' installed to run which is provided with the releas No additional files are replaced, however the following files and folders are added: - cleo\ (CLEO script directory) +- cleo\.config\sa.json (opcodes info file) - cleo\cleo_plugins\SA.Audio.cleo (audio playback utilities powered by BASS.dll library) - cleo\cleo_plugins\SA.DebugUtils.cleo (script debugging utilities plugin) - cleo\cleo_plugins\SA.FileSystemOperations.cleo (disk drive files related operations plugin) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index d2b27f09..b51c4e37 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -462,6 +462,7 @@ DWORD WINAPI CLEO_GetVersion(); eGameVersion WINAPI CLEO_GetGameVersion(); BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); +BOOL WINAPI CLEO_RegisterCommand(const char* commandName, _pOpcodeHandler callback); // uses cleo\.CONFIG\sa.json to obtain opcode number from name void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 65d1bc3a..aa4414d1 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -169,7 +169,15 @@ namespace CLEO if (opcode > LastOriginalOpcode) { - SHOW_ERROR("Opcode [%04X] not registered! \nCalled in script %s\nPreviously called opcode: [%04X]\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str(), prevOpcode); + auto extensionMsg = GetInstance().OpcodeInfoDb.GetExtensionMissingMessage(opcode); + if (!extensionMsg.empty()) extensionMsg = " " + extensionMsg; + + SHOW_ERROR("Custom opcode [%04X] not registered!%s\nCalled in script %s\nPreviously called opcode: [%04X]\nScript suspended.", + opcode, + extensionMsg.c_str(), + ((CCustomScript*)thread)->GetInfoStr().c_str(), + prevOpcode); + return thread->Suspend(); } @@ -178,7 +186,15 @@ namespace CLEO if(result == OR_ERROR) { - SHOW_ERROR("Opcode [%04X] not found! \nCalled in script %s\nScript suspended.", opcode, ((CCustomScript*)thread)->GetInfoStr().c_str()); + auto extensionMsg = GetInstance().OpcodeInfoDb.GetExtensionMissingMessage(opcode); + if (!extensionMsg.empty()) extensionMsg = " " + extensionMsg; + + SHOW_ERROR("Opcode [%04X] not found!%s\nCalled in script %s\nPreviously called opcode: [%04X]\nScript suspended.", + opcode, + extensionMsg.c_str(), + ((CCustomScript*)thread)->GetInfoStr().c_str(), + prevOpcode); + return thread->Suspend(); } } @@ -1779,6 +1795,18 @@ extern "C" return CCustomOpcodeSystem::RegisterOpcode(opcode, callback); } + BOOL WINAPI CLEO_RegisterCommand(const char* commandName, CustomOpcodeHandler callback) + { + WORD opcode = GetInstance().OpcodeInfoDb.GetOpcode(commandName); + if (opcode == 0xFFFF) + { + LOG_WARNING(0, "Failed to register opcode [%s]! Command name not found in the database.", commandName); + return false; + } + + return CCustomOpcodeSystem::RegisterOpcode(opcode, callback); + } + #ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 0ff02433..1e309daa 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -779,7 +779,17 @@ namespace CLEO ss << "offset {" << address << "}"; // Sanny offsets style ss << " - "; - ss << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << CCustomOpcodeSystem::lastOpcode << ": ..."; + ss << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << CCustomOpcodeSystem::lastOpcode; + + auto commandName = GetInstance().OpcodeInfoDb.GetCommandName(CCustomOpcodeSystem::lastOpcode); + if (commandName != nullptr) + { + ss << ": " << commandName; + } + else + { + ss << ": ..."; + } } } diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 9a010cf0..961d13c9 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -121,6 +121,8 @@ namespace CLEO FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_plugins")); FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_saves")); + OpcodeInfoDb.Load(FS::path(Filepath_Root).append("cleo\\.config\\sa.json").generic_string().c_str()); + CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); diff --git a/source/CleoBase.h b/source/CleoBase.h index 67dc418a..e6f1b9d2 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -12,6 +12,7 @@ #include "CTextManager.h" #include "FileEnumerator.h" #include "crc32.h" +#include "OpcodeInfoDatabase.h" namespace CLEO { @@ -31,6 +32,7 @@ namespace CLEO CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; CPluginSystem PluginSystem; + OpcodeInfoDatabase OpcodeInfoDb; int saveSlot = -1; // -1 if not loaded from save diff --git a/source/OpcodeInfoDatabase.cpp b/source/OpcodeInfoDatabase.cpp new file mode 100644 index 00000000..5e96bfbc --- /dev/null +++ b/source/OpcodeInfoDatabase.cpp @@ -0,0 +1,194 @@ +#include "stdafx.h" +#include "OpcodeInfoDatabase.h" +#include "json.hpp" +#include + +using namespace std; +using namespace json; + +void OpcodeInfoDatabase::Clear() +{ + ok = false; + extensions.clear(); +} + +bool OpcodeInfoDatabase::Load(const char* filepath) +{ + Clear(); + + ifstream file(filepath); + if (file.fail()) + { + TRACE("Failed to open opcodes database '%s' file.", filepath); + return false; + } + + file.seekg(0, ifstream::end); + auto size = file.tellg(); + file.seekg(0, ifstream::beg); + + if (size > 8 * 1024 * 1024) // 8MB is reasonable json file size upper limit + { + TRACE("Opcodes database '%s' file too large to load.", filepath); + return false; + } + + std::string text; + text.resize((size_t)size); + file.read(text.data(), size); + file.close(); + + if (file.fail()) + { + TRACE("Error while reading opcodes database '%s' file.", filepath); + return false; + } + + JSON root; + try + { + root = JSON::Load(text.c_str()); + } + catch (const std::exception& ex) + { + TRACE("Error while parsing opcodes database '%s' file:\n%s", filepath, ex.what()); + return false; + } + + if (root.IsNull() || root["extensions"].JSONType() != JSON::Class::Array) + { + TRACE("Invalid opcodes database '%s' file.", filepath); + return false; + } + + for (auto& e : root["extensions"].ArrayRange()) + { + auto name = e["name"]; + auto commands = e["commands"]; + if (name.JSONType() != JSON::Class::String || commands.JSONType() != JSON::Class::Array) + { + continue; // invalid extension + } + + Extension extension; + extension.name = name.ToString(); + + for (auto& c : commands.ArrayRange()) + { + auto commandId = c["id"]; + auto commandName = c["name"]; + if (commandId.JSONType() != JSON::Class::String || commandName.JSONType() != JSON::Class::String) + { + continue; // invalid command + } + + auto id = std::stoul(commandId.ToString(), nullptr, 16); + if (id > 0x7FFF) + { + continue; // opcode out of bounds + } + + extension.opcodes[(uint16_t)id] = commandName.ToString(); + } + + if (!extension.opcodes.empty()) + { + extensions[extension.name] = std::move(extension); + } + } + + ok = true; + return true; +} + +const char* OpcodeInfoDatabase::GetExtensionName(uint16_t opcode) const +{ + if (ok) + { + for (auto& entry : extensions) + { + auto& extension = entry.second; + auto& opcodes = extension.opcodes; + + if (opcodes.find(opcode) != opcodes.end()) + { + return extension.name.c_str(); + } + } + } + + return nullptr; +} + +const char* OpcodeInfoDatabase::GetExtensionName(const char* commandName) const +{ + if (ok) + { + for (auto& entry : extensions) + { + auto& extension = entry.second; + auto& opcodes = extension.opcodes; + + for (auto& opcode : opcodes) + { + if (_strcmpi(commandName, opcode.second.c_str()) == 0) + { + return extension.name.c_str(); + } + } + } + } + + return nullptr; +} + +uint16_t OpcodeInfoDatabase::GetOpcode(const char* commandName) const +{ + if (ok) + { + for (auto& entry : extensions) + { + auto& extension = entry.second; + auto& opcodes = extension.opcodes; + + for (auto& opcode : opcodes) + { + if (_strcmpi(commandName, opcode.second.c_str()) == 0) + { + return opcode.first; + } + } + } + } + + return 0xFFFF; +} + +const char* OpcodeInfoDatabase::GetCommandName(uint16_t opcode) const +{ + if (ok) + { + for (auto& entry : extensions) + { + auto& opcodes = entry.second.opcodes; + + if (opcodes.find(opcode) != opcodes.end()) + { + return opcodes.at(opcode).c_str(); + } + } + } + + return nullptr; +} + +std::string OpcodeInfoDatabase::GetExtensionMissingMessage(uint16_t opcode) const +{ + auto extensionName = GetExtensionName(opcode); + if (extensionName == nullptr) + { + return {}; + } + + return CLEO::StringPrintf("CLEO extension plugin \"%s\" is missing!", extensionName); +} diff --git a/source/OpcodeInfoDatabase.h b/source/OpcodeInfoDatabase.h new file mode 100644 index 00000000..c849f9bb --- /dev/null +++ b/source/OpcodeInfoDatabase.h @@ -0,0 +1,31 @@ +#pragma once +#include +#include + + +class OpcodeInfoDatabase +{ + struct Extension + { + std::string name; + std::map opcodes; + }; + + bool ok = false; + std::map extensions; + +public: + OpcodeInfoDatabase() = default; + + void Clear(); + bool Load(const char* filepath); + + const char* GetExtensionName(uint16_t opcode) const; // nullptr if not found + const char* GetExtensionName(const char* commandName) const; // nullptr if not found + + uint16_t GetOpcode(const char* commandName) const; // 0xFFFF if not found + const char* GetCommandName(uint16_t opcode) const; // nullptr if not found + + std::string GetExtensionMissingMessage(uint16_t opcode) const; // extension "x" missing message if known, empty text otherwise +}; + diff --git a/source/cleo.def b/source/cleo.def index dd5e898a..9e018aa6 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -47,4 +47,5 @@ EXPORTS _CLEO_GetScriptByFilename@8 @44 _CLEO_GetScriptFilename@4 @45 _CLEO_GetScriptWorkDir@4 @46 - _CLEO_SetScriptWorkDir@8 @47 \ No newline at end of file + _CLEO_SetScriptWorkDir@8 @47 + _CLEO_RegisterCommand@8 @48 diff --git a/third-party/SimpleJSON/README.md b/third-party/SimpleJSON/README.md new file mode 100644 index 00000000..26f347d4 --- /dev/null +++ b/third-party/SimpleJSON/README.md @@ -0,0 +1,98 @@ +# SimpleJSON +Simple C++ JSON library + +# Source +https://github.com/nbsdx/SimpleJSON + +## License +Do what the fuck you want public license + +## About +SimpleJSON is a lightweight JSON library for exporting data in JSON format from C++. By taking advantage of templates and operator overloading on the backend, you're able to create and work with JSON objects right away, just as you would expect from a language such as JavaScript. SimpleJSON is a single C++ Header file, "json.hpp". Feel free to download this file on its own, and include it in your project. No other requirements! + +#### Platforms +SimpleJSON should work on any platform; it's only requirement is a C++11 compatible compiler, as it make heavy use of the C++11 move semantics, and variadic templates. The tests are tailored for linux, but could be ported to any platform with python support and a C++11 compiler. + +## API +You can find the API [over here](API.md). For now it's just a Markdown file with C++ syntax highlighting, but it's better than nothing! + +## Upcoming Features +SimpleJSON is still missing some features, which I hope to get done soon! +* Write more test cases to cover all major components( mostly parsing ) + +One of the biggests goals for SimpleJSON is for it to be lightweight, and small. Having complicated logic isn't bad, but it bloats the codebase in most cases. I'd like to keep things small rather than put in big features that take a ton of space. + +If you run into any bugs, or see that I'm missing a featuer, please submit an issue through GitHub and I'll respond as soon as I can! + +## Example +More examples can be found in the 'examples' directory. Check out [the API](API.md) for a full list of functions. + +```cpp +#include "json.hpp" + +int main() { + json::JSON obj; + // Create a new Array as a field of an Object. + obj["array"] = json::Array( true, "Two", 3, 4.0 ); + // Create a new Object as a field of another Object. + obj["obj"] = json::Object(); + // Assign to one of the inner object's fields + obj["obj"]["inner"] = "Inside"; + + // We don't need to specify the type of the JSON object: + obj["new"]["some"]["deep"]["key"] = "Value"; + obj["array2"].append( false, "three" ); + + // We can also parse a string into a JSON object: + obj["parsed"] = JSON::Load( "[ { \"Key\" : \"Value\" }, false ]" ); + + std::cout << obj << std::endl; +} +``` +Output: +``` +{ + "array" : [true, "Two", 3, 4.000000], + "array2" : [false, "three"], + "new" : { + "some" : { + "deep" : { + "key" : "Value" + } + } + }, + "obj" : { + "inner" : "Inside" + }, + "parsed" : [{ + "Key" : "Value" + }, false] +} +``` + +This example can also be written another way: +```cpp +#include "json.hpp" +#include + +using json::JSON; + +int main() { + JSON obj = { + "array", json::Array( true, "Two", 3, 4.0 ), + "obj", { + "inner", "Inside" + }, + "new", { + "some", { + "deep", { + "key", "Value" + } + } + }, + "array2", json::Array( false, "three" ) + }; + + std::cout << obj << std::endl; +``` +Sadly, we don't have access to the : character in C++, so we can't use that to seperate key-value pairs, but by using commas, we can achieve a very similar effect. The other point you might notice, is that we have to explictly create arrays. This is a limitation of C++'s operator overloading rules, so we can't use the [] operator to define the array :( I'm looking into ways to make this smoother. diff --git a/third-party/SimpleJSON/json.hpp b/third-party/SimpleJSON/json.hpp new file mode 100644 index 00000000..341a292d --- /dev/null +++ b/third-party/SimpleJSON/json.hpp @@ -0,0 +1,650 @@ +// Miran: I edited it to make numbers type "long long" instead of just "long", as sa.json won't load otherwise. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace json { + +using std::map; +using std::deque; +using std::string; +using std::enable_if; +using std::initializer_list; +using std::is_same; +using std::is_convertible; +using std::is_integral; +using std::is_floating_point; + +namespace { + string json_escape( const string &str ) { + string output; + for( unsigned i = 0; i < str.length(); ++i ) + switch( str[i] ) { + case '\"': output += "\\\""; break; + case '\\': output += "\\\\"; break; + case '\b': output += "\\b"; break; + case '\f': output += "\\f"; break; + case '\n': output += "\\n"; break; + case '\r': output += "\\r"; break; + case '\t': output += "\\t"; break; + default : output += str[i]; break; + } + return std::move( output ); + } +} + +class JSON +{ + union BackingData { + BackingData( double d ) : Float( d ){} + BackingData( long long l ) : Int( l ){} + BackingData( bool b ) : Bool( b ){} + BackingData( string s ) : String( new string( s ) ){} + BackingData() : Int( 0 ){} + + deque *List; + map *Map; + string *String; + double Float; + long long Int; + bool Bool; + } Internal; + + public: + enum class Class { + Null, + Object, + Array, + String, + Floating, + Integral, + Boolean + }; + + template + class JSONWrapper { + Container *object; + + public: + JSONWrapper( Container *val ) : object( val ) {} + JSONWrapper( std::nullptr_t ) : object( nullptr ) {} + + typename Container::iterator begin() { return object ? object->begin() : typename Container::iterator(); } + typename Container::iterator end() { return object ? object->end() : typename Container::iterator(); } + typename Container::const_iterator begin() const { return object ? object->begin() : typename Container::iterator(); } + typename Container::const_iterator end() const { return object ? object->end() : typename Container::iterator(); } + }; + + template + class JSONConstWrapper { + const Container *object; + + public: + JSONConstWrapper( const Container *val ) : object( val ) {} + JSONConstWrapper( std::nullptr_t ) : object( nullptr ) {} + + typename Container::const_iterator begin() const { return object ? object->begin() : typename Container::const_iterator(); } + typename Container::const_iterator end() const { return object ? object->end() : typename Container::const_iterator(); } + }; + + JSON() : Internal(), Type( Class::Null ){} + + JSON( initializer_list list ) + : JSON() + { + SetType( Class::Object ); + for( auto i = list.begin(), e = list.end(); i != e; ++i, ++i ) + operator[]( i->ToString() ) = *std::next( i ); + } + + JSON( JSON&& other ) + : Internal( other.Internal ) + , Type( other.Type ) + { other.Type = Class::Null; other.Internal.Map = nullptr; } + + JSON& operator=( JSON&& other ) { + ClearInternal(); + Internal = other.Internal; + Type = other.Type; + other.Internal.Map = nullptr; + other.Type = Class::Null; + return *this; + } + + JSON( const JSON &other ) { + switch( other.Type ) { + case Class::Object: + Internal.Map = + new map( other.Internal.Map->begin(), + other.Internal.Map->end() ); + break; + case Class::Array: + Internal.List = + new deque( other.Internal.List->begin(), + other.Internal.List->end() ); + break; + case Class::String: + Internal.String = + new string( *other.Internal.String ); + break; + default: + Internal = other.Internal; + } + Type = other.Type; + } + + JSON& operator=( const JSON &other ) { + ClearInternal(); + switch( other.Type ) { + case Class::Object: + Internal.Map = + new map( other.Internal.Map->begin(), + other.Internal.Map->end() ); + break; + case Class::Array: + Internal.List = + new deque( other.Internal.List->begin(), + other.Internal.List->end() ); + break; + case Class::String: + Internal.String = + new string( *other.Internal.String ); + break; + default: + Internal = other.Internal; + } + Type = other.Type; + return *this; + } + + ~JSON() { + switch( Type ) { + case Class::Array: + delete Internal.List; + break; + case Class::Object: + delete Internal.Map; + break; + case Class::String: + delete Internal.String; + break; + default:; + } + } + + template + JSON( T b, typename enable_if::value>::type* = 0 ) : Internal( b ), Type( Class::Boolean ){} + + template + JSON( T i, typename enable_if::value && !is_same::value>::type* = 0 ) : Internal( (long long)i ), Type( Class::Integral ){} + + template + JSON( T f, typename enable_if::value>::type* = 0 ) : Internal( (double)f ), Type( Class::Floating ){} + + template + JSON( T s, typename enable_if::value>::type* = 0 ) : Internal( string( s ) ), Type( Class::String ){} + + JSON( std::nullptr_t ) : Internal(), Type( Class::Null ){} + + static JSON Make( Class type ) { + JSON ret; ret.SetType( type ); + return ret; + } + + static JSON Load( const string & ); + + template + void append( T arg ) { + SetType( Class::Array ); Internal.List->emplace_back( arg ); + } + + template + void append( T arg, U... args ) { + append( arg ); append( args... ); + } + + template + typename enable_if::value, JSON&>::type operator=( T b ) { + SetType( Class::Boolean ); Internal.Bool = b; return *this; + } + + template + typename enable_if::value && !is_same::value, JSON&>::type operator=( T i ) { + SetType( Class::Integral ); Internal.Int = i; return *this; + } + + template + typename enable_if::value, JSON&>::type operator=( T f ) { + SetType( Class::Floating ); Internal.Float = f; return *this; + } + + template + typename enable_if::value, JSON&>::type operator=( T s ) { + SetType( Class::String ); *Internal.String = string( s ); return *this; + } + + JSON& operator[]( const string &key ) { + SetType( Class::Object ); return Internal.Map->operator[]( key ); + } + + JSON& operator[]( unsigned index ) { + SetType( Class::Array ); + if( index >= Internal.List->size() ) Internal.List->resize( index + 1 ); + return Internal.List->operator[]( index ); + } + + JSON &at( const string &key ) { + return operator[]( key ); + } + + const JSON &at( const string &key ) const { + return Internal.Map->at( key ); + } + + JSON &at( unsigned index ) { + return operator[]( index ); + } + + const JSON &at( unsigned index ) const { + return Internal.List->at( index ); + } + + int length() const { + if( Type == Class::Array ) + return Internal.List->size(); + else + return -1; + } + + bool hasKey( const string &key ) const { + if( Type == Class::Object ) + return Internal.Map->find( key ) != Internal.Map->end(); + return false; + } + + int size() const { + if( Type == Class::Object ) + return Internal.Map->size(); + else if( Type == Class::Array ) + return Internal.List->size(); + else + return -1; + } + + Class JSONType() const { return Type; } + + /// Functions for getting primitives from the JSON object. + bool IsNull() const { return Type == Class::Null; } + + string ToString() const { bool b; return std::move( ToString( b ) ); } + string ToString( bool &ok ) const { + ok = (Type == Class::String); + return ok ? std::move( json_escape( *Internal.String ) ): string(""); + } + + double ToFloat() const { bool b; return ToFloat( b ); } + double ToFloat( bool &ok ) const { + ok = (Type == Class::Floating); + return ok ? Internal.Float : 0.0; + } + + long long ToInt() const { bool b; return ToInt( b ); } + long long ToInt( bool &ok ) const { + ok = (Type == Class::Integral); + return ok ? Internal.Int : 0; + } + + bool ToBool() const { bool b; return ToBool( b ); } + bool ToBool( bool &ok ) const { + ok = (Type == Class::Boolean); + return ok ? Internal.Bool : false; + } + + JSONWrapper> ObjectRange() { + if( Type == Class::Object ) + return JSONWrapper>( Internal.Map ); + return JSONWrapper>( nullptr ); + } + + JSONWrapper> ArrayRange() { + if( Type == Class::Array ) + return JSONWrapper>( Internal.List ); + return JSONWrapper>( nullptr ); + } + + JSONConstWrapper> ObjectRange() const { + if( Type == Class::Object ) + return JSONConstWrapper>( Internal.Map ); + return JSONConstWrapper>( nullptr ); + } + + + JSONConstWrapper> ArrayRange() const { + if( Type == Class::Array ) + return JSONConstWrapper>( Internal.List ); + return JSONConstWrapper>( nullptr ); + } + + string dump( int depth = 1, string tab = " ") const { + string pad = ""; + for( int i = 0; i < depth; ++i, pad += tab ); + + switch( Type ) { + case Class::Null: + return "null"; + case Class::Object: { + string s = "{\n"; + bool skip = true; + for( auto &p : *Internal.Map ) { + if( !skip ) s += ",\n"; + s += ( pad + "\"" + p.first + "\" : " + p.second.dump( depth + 1, tab ) ); + skip = false; + } + s += ( "\n" + pad.erase( 0, 2 ) + "}" ) ; + return s; + } + case Class::Array: { + string s = "["; + bool skip = true; + for( auto &p : *Internal.List ) { + if( !skip ) s += ", "; + s += p.dump( depth + 1, tab ); + skip = false; + } + s += "]"; + return s; + } + case Class::String: + return "\"" + json_escape( *Internal.String ) + "\""; + case Class::Floating: + return std::to_string( Internal.Float ); + case Class::Integral: + return std::to_string( Internal.Int ); + case Class::Boolean: + return Internal.Bool ? "true" : "false"; + default: + return ""; + } + return ""; + } + + friend std::ostream& operator<<( std::ostream&, const JSON & ); + + private: + void SetType( Class type ) { + if( type == Type ) + return; + + ClearInternal(); + + switch( type ) { + case Class::Null: Internal.Map = nullptr; break; + case Class::Object: Internal.Map = new map(); break; + case Class::Array: Internal.List = new deque(); break; + case Class::String: Internal.String = new string(); break; + case Class::Floating: Internal.Float = 0.0; break; + case Class::Integral: Internal.Int = 0; break; + case Class::Boolean: Internal.Bool = false; break; + } + + Type = type; + } + + private: + /* beware: only call if YOU know that Internal is allocated. No checks performed here. + This function should be called in a constructed JSON just before you are going to + overwrite Internal... + */ + void ClearInternal() { + switch( Type ) { + case Class::Object: delete Internal.Map; break; + case Class::Array: delete Internal.List; break; + case Class::String: delete Internal.String; break; + default:; + } + } + + private: + + Class Type = Class::Null; +}; + +JSON Array() { + return std::move( JSON::Make( JSON::Class::Array ) ); +} + +template +JSON Array( T... args ) { + JSON arr = JSON::Make( JSON::Class::Array ); + arr.append( args... ); + return std::move( arr ); +} + +JSON Object() { + return std::move( JSON::Make( JSON::Class::Object ) ); +} + +std::ostream& operator<<( std::ostream &os, const JSON &json ) { + os << json.dump(); + return os; +} + +namespace { + JSON parse_next( const string &, size_t & ); + + void consume_ws( const string &str, size_t &offset ) { + while( isspace( str[offset] ) ) ++offset; + } + + JSON parse_object( const string &str, size_t &offset ) { + JSON Object = JSON::Make( JSON::Class::Object ); + + ++offset; + consume_ws( str, offset ); + if( str[offset] == '}' ) { + ++offset; return std::move( Object ); + } + + while( true ) { + JSON Key = parse_next( str, offset ); + consume_ws( str, offset ); + if( str[offset] != ':' ) { + std::cerr << "Error: Object: Expected colon, found '" << str[offset] << "'\n"; + break; + } + consume_ws( str, ++offset ); + JSON Value = parse_next( str, offset ); + Object[Key.ToString()] = Value; + + consume_ws( str, offset ); + if( str[offset] == ',' ) { + ++offset; continue; + } + else if( str[offset] == '}' ) { + ++offset; break; + } + else { + std::cerr << "ERROR: Object: Expected comma, found '" << str[offset] << "'\n"; + break; + } + } + + return std::move( Object ); + } + + JSON parse_array( const string &str, size_t &offset ) { + JSON Array = JSON::Make( JSON::Class::Array ); + unsigned index = 0; + + ++offset; + consume_ws( str, offset ); + if( str[offset] == ']' ) { + ++offset; return std::move( Array ); + } + + while( true ) { + Array[index++] = parse_next( str, offset ); + consume_ws( str, offset ); + + if( str[offset] == ',' ) { + ++offset; continue; + } + else if( str[offset] == ']' ) { + ++offset; break; + } + else { + std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset] << "'\n"; + return std::move( JSON::Make( JSON::Class::Array ) ); + } + } + + return std::move( Array ); + } + + JSON parse_string( const string &str, size_t &offset ) { + JSON String; + string val; + for( char c = str[++offset]; c != '\"' ; c = str[++offset] ) { + if( c == '\\' ) { + switch( str[ ++offset ] ) { + case '\"': val += '\"'; break; + case '\\': val += '\\'; break; + case '/' : val += '/' ; break; + case 'b' : val += '\b'; break; + case 'f' : val += '\f'; break; + case 'n' : val += '\n'; break; + case 'r' : val += '\r'; break; + case 't' : val += '\t'; break; + case 'u' : { + val += "\\u" ; + for( unsigned i = 1; i <= 4; ++i ) { + c = str[offset+i]; + if( (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ) + val += c; + else { + std::cerr << "ERROR: String: Expected hex character in unicode escape, found '" << c << "'\n"; + return std::move( JSON::Make( JSON::Class::String ) ); + } + } + offset += 4; + } break; + default : val += '\\'; break; + } + } + else + val += c; + } + ++offset; + String = val; + return std::move( String ); + } + + JSON parse_number( const string &str, size_t &offset ) { + JSON Number; + string val, exp_str; + char c; + bool isDouble = false; + long exp = 0; + while( true ) { + c = str[offset++]; + if( (c == '-') || (c >= '0' && c <= '9') ) + val += c; + else if( c == '.' ) { + val += c; + isDouble = true; + } + else + break; + } + if( c == 'E' || c == 'e' ) { + c = str[ offset++ ]; + if( c == '-' ){ ++offset; exp_str += '-';} + while( true ) { + c = str[ offset++ ]; + if( c >= '0' && c <= '9' ) + exp_str += c; + else if( !isspace( c ) && c != ',' && c != ']' && c != '}' ) { + std::cerr << "ERROR: Number: Expected a number for exponent, found '" << c << "'\n"; + return std::move( JSON::Make( JSON::Class::Null ) ); + } + else + break; + } + exp = std::stol( exp_str ); + } + else if( !isspace( c ) && c != ',' && c != ']' && c != '}' ) { + std::cerr << "ERROR: Number: unexpected character '" << c << "'\n"; + return std::move( JSON::Make( JSON::Class::Null ) ); + } + --offset; + + if( isDouble ) + Number = std::stod( val ) * std::pow( 10, exp ); + else { + if( !exp_str.empty() ) + Number = std::stoll( val ) * std::pow( 10, exp ); + else + Number = std::stoll( val ); + } + return std::move( Number ); + } + + JSON parse_bool( const string &str, size_t &offset ) { + JSON Bool; + if( str.substr( offset, 4 ) == "true" ) + Bool = true; + else if( str.substr( offset, 5 ) == "false" ) + Bool = false; + else { + std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '" << str.substr( offset, 5 ) << "'\n"; + return std::move( JSON::Make( JSON::Class::Null ) ); + } + offset += (Bool.ToBool() ? 4 : 5); + return std::move( Bool ); + } + + JSON parse_null( const string &str, size_t &offset ) { + JSON Null; + if( str.substr( offset, 4 ) != "null" ) { + std::cerr << "ERROR: Null: Expected 'null', found '" << str.substr( offset, 4 ) << "'\n"; + return std::move( JSON::Make( JSON::Class::Null ) ); + } + offset += 4; + return std::move( Null ); + } + + JSON parse_next( const string &str, size_t &offset ) { + char value; + consume_ws( str, offset ); + value = str[offset]; + switch( value ) { + case '[' : return std::move( parse_array( str, offset ) ); + case '{' : return std::move( parse_object( str, offset ) ); + case '\"': return std::move( parse_string( str, offset ) ); + case 't' : + case 'f' : return std::move( parse_bool( str, offset ) ); + case 'n' : return std::move( parse_null( str, offset ) ); + default : if( ( value <= '9' && value >= '0' ) || value == '-' ) + return std::move( parse_number( str, offset ) ); + } + std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n"; + return JSON(); + } +} + +JSON JSON::Load( const string &str ) { + size_t offset = 0; + return std::move( parse_next( str, offset ) ); +} + +} // End Namespace json From 8386bb12d5951ff4b49a8b7256e79260d447ea5c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 17:02:03 +0100 Subject: [PATCH 106/216] 0AE2, 0AE3 opcodes fix (#83) --- source/CCustomOpcodeSystem.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index aa4414d1..c0e9b9f8 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1655,6 +1655,7 @@ namespace CLEO auto found = pool.GetRef(obj); OPCODE_WRITE_PARAM_INT(found); OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; } } } @@ -1698,6 +1699,7 @@ namespace CLEO auto found = pool.GetRef(obj); OPCODE_WRITE_PARAM_INT(found); OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; } } } From aace42938514a515f951fd9d3e53374c0b6963c3 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 17:30:26 +0100 Subject: [PATCH 107/216] 0AA6 pointer validation relaxed for legacy scripts (#82) * 0AA6 pointer validation relaxed for legacy scripts * fixup! 0AA6 pointer validation relaxed for legacy scripts --- cleo_plugins/MemoryOperations/MemoryOperations.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 02a9160f..b43179ac 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -419,7 +419,17 @@ class MemoryOperations static OpcodeResult __stdcall opcode_0AA6(CLEO::CRunningScript* thread) { auto func = OPCODE_READ_PARAM_PTR(); - auto obj = OPCODE_READ_PARAM_PTR(); + + void* obj = nullptr; + if (!IsLegacyScript(thread)) + { + obj = OPCODE_READ_PARAM_PTR(); + } + else + { + obj = (void*)OPCODE_READ_PARAM_INT(); // at least one mod used 0AA6 with 0 as struct argument (effectively turning it into 0AA5 opcode...) + } + auto numArgs = OPCODE_READ_PARAM_INT(); auto numPop = OPCODE_READ_PARAM_INT(); From 77e52a22fb169d8d58d567f0a83fb9d46af6b927 Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 1 Mar 2024 11:33:35 -0500 Subject: [PATCH 108/216] Update main.yml (#84) --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3eaa4426..491db3a2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,7 +42,7 @@ jobs: run: | @REM create output directory mkdir .output\Release\cleo - mkdir .output\Release\.config + mkdir .output\Release\cleo\.config mkdir .output\Release\cleo\cleo_modules mkdir .output\Release\cleo\cleo_plugins mkdir .output\Release\cleo\cleo_saves From 32e59cf65143562ec7e40da10cec55285eeb8e7d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 1 Mar 2024 20:17:05 +0100 Subject: [PATCH 109/216] Fixed opcodes 0A8D, 2401, 2402 types invalid data type restrictions. (#85) --- cleo_plugins/MemoryOperations/MemoryOperations.cpp | 12 ++++++------ cleo_sdk/CLEO_Utils.h | 8 ++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index b43179ac..bd3f2143 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -185,11 +185,11 @@ class MemoryOperations const void* source; auto paramType = thread->PeekDataType(); bool sourceText = false; - if (IsVariable(paramType)) + if (IsVariable(paramType) || IsVarString(paramType)) { source = CLEO_GetPointerToScriptVariable(thread); } - else if (IsImmString(paramType) || IsVarString(paramType)) + else if (IsImmString(paramType)) { static char buffer[MAX_STR_LEN]; @@ -286,7 +286,7 @@ class MemoryOperations memcpy(&value, address, size); } - OPCODE_WRITE_PARAM_UINT(value); + OPCODE_WRITE_PARAM_ANY32(value); return OR_CONTINUE; } @@ -662,7 +662,7 @@ class MemoryOperations { if (size == 0) { - OPCODE_WRITE_PARAM_INT(0); + OPCODE_WRITE_PARAM_ANY32(0); return OR_CONTINUE; // done } @@ -674,7 +674,7 @@ class MemoryOperations } if (size > 0) memcpy(&result, (void*)(ptr + offset), size); - OPCODE_WRITE_PARAM_INT(result); + OPCODE_WRITE_PARAM_ANY32(result); return OR_CONTINUE; } else if (IsVarString(resultType)) @@ -716,7 +716,7 @@ class MemoryOperations size = sizeof(DWORD); } - auto value = OPCODE_READ_PARAM_INT(); + auto value = OPCODE_READ_PARAM_ANY32(); memcpy(ptr + offset, &value, size); return OR_CONTINUE; diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 5fe37d42..6b839fdc 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -31,6 +31,7 @@ namespace CLEO OPCODE_READ_PARAM_INT() OPCODE_READ_PARAM_UINT() OPCODE_READ_PARAM_FLOAT() + OPCODE_READ_PARAM_ANY32() // get raw data of simple-type value (practically integers and floats) OPCODE_READ_PARAM_STRING(varName) // reads param and creates const char* variable named 'varName' with pointer to null-terminated string OPCODE_READ_PARAM_STRING_LEN(varName, maxLength) // same as above, but text length is clamped to maxLength OPCODE_READ_PARAM_FILEPATH(varName) // reads param and creates const char* variable named 'varName' with pointer to resolved, null-terminated, filepath @@ -51,6 +52,7 @@ namespace CLEO OPCODE_WRITE_PARAM_INT(value) OPCODE_WRITE_PARAM_UINT(value) OPCODE_WRITE_PARAM_FLOAT(value) + OPCODE_WRITE_PARAM_ANY32(value) // write raw data into simple-type variable (practically integers and floats) OPCODE_WRITE_PARAM_STRING(value) OPCODE_WRITE_PARAM_PTR(value) // memory address */ @@ -418,6 +420,9 @@ namespace CLEO #define OPCODE_READ_PARAM_FLOAT() _readParamFloat(thread).fParam; \ if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_READ_PARAM_ANY32() _readParam(thread).dwParam; \ + if (!_paramWasInt() && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be int or float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_READ_PARAM_STRING(_varName) char _buff_##_varName[MAX_STR_LEN + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, MAX_STR_LEN + 1); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } #define OPCODE_READ_PARAM_STRING_LEN(_varName, _maxLen) char _buff_##_varName[_maxLen + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, _maxLen + 1); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } @@ -474,6 +479,9 @@ namespace CLEO #define OPCODE_WRITE_PARAM_UINT(value) _writeParam(thread, value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_WRITE_PARAM_ANY32(value) _writeParam(thread, value); \ + if (!_paramWasInt(true) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be int or float variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } From f76bea06c1eb4cc5caae9e395a51b29f0d708dfc Mon Sep 17 00:00:00 2001 From: Seemann Date: Sat, 2 Mar 2024 11:04:13 -0500 Subject: [PATCH 110/216] OPCODE_READ_PARAM_FILEPATH should return pointer to buffer with resolved file path (#86) Yes, I noticed that too. --- cleo_sdk/CLEO_Utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 6b839fdc..87e52e18 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -427,7 +427,7 @@ namespace CLEO #define OPCODE_READ_PARAM_STRING_LEN(_varName, _maxLen) char _buff_##_varName[_maxLen + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, _maxLen + 1); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; + #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; _readParamText(thread, _buff_##_varName, 512); if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; const char* _varName = _buff_##_varName; #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ From 7d28bb853b25b8381752d668298ee5a3ab8237f2 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 2 Mar 2024 17:17:44 +0100 Subject: [PATCH 111/216] Fixed OPCODE_READ_PARAM_FILEPATH macro. (#87) --- cleo_sdk/CLEO_Utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 87e52e18..79e36b0e 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -427,7 +427,7 @@ namespace CLEO #define OPCODE_READ_PARAM_STRING_LEN(_varName, _maxLen) char _buff_##_varName[_maxLen + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, _maxLen + 1); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; _readParamText(thread, _buff_##_varName, 512); if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; const char* _varName = _buff_##_varName; + #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ From 27cb5dcc5a7e0a51444822b0fa3b2c6eaae01a77 Mon Sep 17 00:00:00 2001 From: Seemann Date: Sun, 3 Mar 2024 16:27:50 -0500 Subject: [PATCH 112/216] memory operation tests (#90) --- .gitignore | 3 ++- tests/cleo_tests/.Compile_All.bat | 22 +++++++++++++++++++ tests/cleo_tests/0AEE.s | Bin 505 -> 0 bytes tests/cleo_tests/Audio/0AAC.s | Bin 434 -> 0 bytes tests/cleo_tests/Audio/0AAD.s | Bin 299 -> 0 bytes tests/cleo_tests/Audio/0AAE.s | Bin 285 -> 0 bytes tests/cleo_tests/Audio/0AAF.s | Bin 417 -> 0 bytes tests/cleo_tests/Audio/0AB9.s | Bin 843 -> 0 bytes tests/cleo_tests/Audio/0ABB.s | Bin 426 -> 0 bytes tests/cleo_tests/Audio/0ABC.s | Bin 684 -> 0 bytes tests/cleo_tests/Audio/0AC0.s | Bin 802 -> 0 bytes tests/cleo_tests/Audio/0AC1.s | Bin 449 -> 0 bytes tests/cleo_tests/Audio/0AC2.s | Bin 1099 -> 0 bytes tests/cleo_tests/Audio/0AC4.s | Bin 644 -> 0 bytes tests/cleo_tests/FilesystemOperations/0A99.s | Bin 952 -> 0 bytes tests/cleo_tests/FilesystemOperations/0A9A.s | Bin 354 -> 0 bytes tests/cleo_tests/FilesystemOperations/0A9B.s | Bin 448 -> 0 bytes tests/cleo_tests/FilesystemOperations/0A9C.s | Bin 390 -> 0 bytes tests/cleo_tests/FilesystemOperations/0A9D.s | Bin 1246 -> 0 bytes tests/cleo_tests/MemoryOperations/0A8C.s | Bin 1910 -> 0 bytes tests/cleo_tests/MemoryOperations/0A8C.txt | 19 ++++++++++++++++ tests/cleo_tests/MemoryOperations/0A8D.s | Bin 1386 -> 0 bytes tests/cleo_tests/MemoryOperations/0A8D.txt | 15 ++++++++++++- tests/cleo_tests/MemoryOperations/0A96.s | Bin 205 -> 0 bytes tests/cleo_tests/MemoryOperations/0A97.s | Bin 240 -> 0 bytes tests/cleo_tests/MemoryOperations/0A98.s | Bin 242 -> 0 bytes tests/cleo_tests/MemoryOperations/0AA4.txt | 22 +++++++++++++++++++ tests/cleo_tests/MemoryOperations/0AC6.s | Bin 291 -> 0 bytes tests/cleo_tests/MemoryOperations/0AC7.s | Bin 257 -> 0 bytes tests/cleo_tests/MemoryOperations/0AC8.s | Bin 421 -> 0 bytes tests/cleo_tests/MemoryOperations/0AC9.s | Bin 225 -> 0 bytes tests/cleo_tests/MemoryOperations/0AEA.s | Bin 222 -> 0 bytes tests/cleo_tests/MemoryOperations/0AEB.s | Bin 255 -> 0 bytes tests/cleo_tests/MemoryOperations/2400.s | Bin 2443 -> 0 bytes tests/cleo_tests/MemoryOperations/2401.s | Bin 1965 -> 0 bytes tests/cleo_tests/MemoryOperations/2402.s | Bin 1968 -> 0 bytes tests/cleo_tests/MemoryOperations/2403.s | Bin 108 -> 0 bytes tests/cleo_tests/MemoryOperations/2404.s | Bin 255 -> 0 bytes 38 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 tests/cleo_tests/.Compile_All.bat delete mode 100644 tests/cleo_tests/0AEE.s delete mode 100644 tests/cleo_tests/Audio/0AAC.s delete mode 100644 tests/cleo_tests/Audio/0AAD.s delete mode 100644 tests/cleo_tests/Audio/0AAE.s delete mode 100644 tests/cleo_tests/Audio/0AAF.s delete mode 100644 tests/cleo_tests/Audio/0AB9.s delete mode 100644 tests/cleo_tests/Audio/0ABB.s delete mode 100644 tests/cleo_tests/Audio/0ABC.s delete mode 100644 tests/cleo_tests/Audio/0AC0.s delete mode 100644 tests/cleo_tests/Audio/0AC1.s delete mode 100644 tests/cleo_tests/Audio/0AC2.s delete mode 100644 tests/cleo_tests/Audio/0AC4.s delete mode 100644 tests/cleo_tests/FilesystemOperations/0A99.s delete mode 100644 tests/cleo_tests/FilesystemOperations/0A9A.s delete mode 100644 tests/cleo_tests/FilesystemOperations/0A9B.s delete mode 100644 tests/cleo_tests/FilesystemOperations/0A9C.s delete mode 100644 tests/cleo_tests/FilesystemOperations/0A9D.s delete mode 100644 tests/cleo_tests/MemoryOperations/0A8C.s delete mode 100644 tests/cleo_tests/MemoryOperations/0A8D.s delete mode 100644 tests/cleo_tests/MemoryOperations/0A96.s delete mode 100644 tests/cleo_tests/MemoryOperations/0A97.s delete mode 100644 tests/cleo_tests/MemoryOperations/0A98.s create mode 100644 tests/cleo_tests/MemoryOperations/0AA4.txt delete mode 100644 tests/cleo_tests/MemoryOperations/0AC6.s delete mode 100644 tests/cleo_tests/MemoryOperations/0AC7.s delete mode 100644 tests/cleo_tests/MemoryOperations/0AC8.s delete mode 100644 tests/cleo_tests/MemoryOperations/0AC9.s delete mode 100644 tests/cleo_tests/MemoryOperations/0AEA.s delete mode 100644 tests/cleo_tests/MemoryOperations/0AEB.s delete mode 100644 tests/cleo_tests/MemoryOperations/2400.s delete mode 100644 tests/cleo_tests/MemoryOperations/2401.s delete mode 100644 tests/cleo_tests/MemoryOperations/2402.s delete mode 100644 tests/cleo_tests/MemoryOperations/2403.s delete mode 100644 tests/cleo_tests/MemoryOperations/2404.s diff --git a/.gitignore b/.gitignore index 47a59810..7f88dfcf 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,5 @@ ipch/ .vs/ *.zip *.lib -node_modules/ \ No newline at end of file +node_modules/ +tests/**/*.s \ No newline at end of file diff --git a/tests/cleo_tests/.Compile_All.bat b/tests/cleo_tests/.Compile_All.bat new file mode 100644 index 00000000..31766810 --- /dev/null +++ b/tests/cleo_tests/.Compile_All.bat @@ -0,0 +1,22 @@ +@REM Compile all .txt files in the current directory to .s files using Sanny Builder 4 +@REM Usage: set SANNY="path\to\sanny.exe" && .Compile_All.bat + +@echo off +SETLOCAL EnableDelayedExpansion + +@REM Delete all .s files in the current directory and subdirectories +for /f "delims=" %%i in ('dir /b /s *.s') do ( + set p=%%i + echo Deleting !p:%__CD__%=!... + del "%%i" +) + +@REM Compile all .txt files in the current directory and subdirectories +for /f "delims=" %%i in ('dir /b /s *.txt') do ( + set p=%%i + echo Compiling !p:%__CD__%=!... + %SANNY% --compile "%%i" "%%~dpni.s" --no-splash --mode sa_sbl +) + +echo Done. +pause diff --git a/tests/cleo_tests/0AEE.s b/tests/cleo_tests/0AEE.s deleted file mode 100644 index 2af6eff5891d6815e431811c27407e94e260539c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 505 zcmZ3&%*SHj=<0fyfl-l<%fQjnQ9+|1zg&}nk%5Kb9Tyt|gM$N*XmDU=V7LYpa%KQB zfC7u0d>I(0{{R0Us7}%o0#$jMJkiBBv|$;^*0E-6Y)%++LIWME;q2E=Q)_=NOg zGV{t3b23vD@)C1X^>Pc0nHd;-85q6)|NqaR$QNE$R9BY)MYs&qQBXEeaC7wZadlCw z%d7Lt&r?XP$Sf|&%u82D%gjkt0J$MG#a_W9F)t-2)k;A%1!xHa69Xd?&=#OmUFy<_ zu_C}RIM~%iL8CmgBm-zeL27bIYKlU6Vo@H@CKPXR>BYDJ#Sq?E^$%>34lx!PDnKj( lIWPdI4;TzVLhT^N`c7$tz3 z85H@#>x!V7iPG=p=;`C?qF9$#=a!h6lbWJXlCO}HpO~VM2IT1}cqHbfho1>?XtBYb?UY%QFW=?8~LP@?tPJUvFLK={#r{Iy8my(lerJ$MubQ{>! Q>$rd{;+NKY+^Pb^Kz%#SZFDN0Sujn7HVOE1aLWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*RSUSvX~jczOV!_SU_&y`Hxh$8zQ+q5vUvJ_5hOHZm588yMcmhML}wENooqv VBdRG1{>jOuMX4!3*D*}y0stUlgUA2? diff --git a/tests/cleo_tests/Audio/0AB9.s b/tests/cleo_tests/Audio/0AB9.s deleted file mode 100644 index 6dd24a656d88ed2e25420cd2950f094b821409f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 843 zcmZ3&%*SHj=wx}Afl-l97Q|A}NKY+^Pb^Kz%#SZFDN0Su1yYG6shSLo3@i-SfOriT zAD3Q?OJ-iWUT%RgGXsM!17rFB|Nns|7}llN)n$MXUZZprlnoRD9D{>hT^N`c7$tz3 z85H@#>x!V7iPG=p=;`C?qF9$#=a!h6lbWJXlCO}HpO~VM2IT1}cqHbfb)NOZX&lFO5Unt?74AjRc|3J900rYN{p6r?7Xq^1D< z0p$57Czlqbrm*}6IuGpVwID~cFrtO6qlU*T?^o=xbw` qnZWqU1!^YM*T%&7ni1k_Rvku6SN{jPT96V~Z~sFhSA*kfG8X`OGzX9X diff --git a/tests/cleo_tests/Audio/0ABB.s b/tests/cleo_tests/Audio/0ABB.s deleted file mode 100644 index 2fb78156abf693ffbcf645915206b771dc7ed1e2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 426 zcmZ3&%*SHj=;U;mfl-l94#ZN>NKY+^Pb^Kz%#SZFDN0SujW5g3Da}pQWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*YD;6vX~jczHkOH*cccZ?19eT_K#HO8zMPB0;n74{6LbNZ>WH9zG|9+Yehk7 ba!G0m&@(`ue{ynZQECdC{8VjPb^Kz%#SZFDN0SujW5g3Da}pQWME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*YD;6vX~jczHkOH*cccZ?19eT_K#HO8zMPB0;n74{6LbNZ>WH9zG|9+Yehk7 za!G0m&@(`ue{ynZQECdG6d{EAuyQ>0AJ12rvLx| diff --git a/tests/cleo_tests/Audio/0AC0.s b/tests/cleo_tests/Audio/0AC0.s deleted file mode 100644 index 704ce66d5d0f5a290449e4d6a341db3532856fb6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 802 zcmZ3&%*SHj=xlJ9fl-l94#ZN>C{8VjPb^Kz%#SZFDN0SujnB!?FGx+%WME`qVYmjw zYq4>ZE4F1@ZU1BCFKrK6y1pb+2~9PH}Cz{J2P z3e?S@$QMyp1l3KP0d9_-KCUi`b$NAeiJ3X6DGDX|3OV_SDGF&oo}Pk7VqQv4s+EFj z3eb&U*B{^lvRD{NbEqNAp&2O43xY{NSmfGH%{gP0Bvse5&!@I diff --git a/tests/cleo_tests/Audio/0AC1.s b/tests/cleo_tests/Audio/0AC1.s deleted file mode 100644 index 22bc4b7fd8babbbdc54e1e844daf6889f88b08e2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 449 zcmZ3&%*SHj=xlhHfl-l98pKl2$jMJki8oG(Pb^Kz%#SZFDN0Su)ns5~U}3lh#0R|U2nq%8rC!NIOB3L52^B^f|V3R06xQd1Pl6N~bImZ5l#OE1O+D2DLVJcwQTMA~Jj p0I>_?Oh=$rV0eTQA0CEC;o+7D41*Mfl6-JDfI~zNEhr{)0Ra0NkA?sM diff --git a/tests/cleo_tests/Audio/0AC2.s b/tests/cleo_tests/Audio/0AC2.s deleted file mode 100644 index 9d9f7a4547059055ba65d99987027e1f91d2febe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1099 zcmdUt!Aiq07{|Y?orfZQ0)-S2oQ$!po&*oYZEi4OxE+NMnz$9ZX0#FX(wFey3wZSn zyiC1&^GQ5;uqNncF9WT3FeKrh%m4d+|1UR&ZPvZR<^v$du4sjK5-!Osq?g2Hgr2iN zkVM9uP6?I77b0d!1rV6U_yG zEtl3c$F(Y7n4^*7*!|$(ML8Pyi({Y$!(;4EXiR;n6caqbk&u{Q1c^+X1<9kSDAoY* zTwS0QfPug$fnn%j7aLD=fu%pdy`_r%-qX!{K3Vogzug&;6{}oDrU`etvWD|+wK5Tc e@@sLby?h7D;Jp79+~$hR$Vu<0JNnz?uB>S6ov1!$tFmi;RX>EN@yEbx(ZE7g$ki{BM8Hg8QVa9N-~0O@)EAyx%DAL z_wIZaabcW6qb@aYFPzz&bH2F%cYWI!`jdl4AdbDO74B6+(oM>)smCe1 zCTYB8hjCxpH5T%cpe5$o!3*Aq=KU?FfACz3-xkw#&VvDL)_u2^*2vu_O>*`tqSkR zF#5$X{$z-|F^uqsAyVfMCFpRN diff --git a/tests/cleo_tests/FilesystemOperations/0A99.s b/tests/cleo_tests/FilesystemOperations/0A99.s deleted file mode 100644 index 44f9c7396a2b3697eca5c2fe3d755a1edd865a73..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 952 zcmbW0ze~eF6vy8+Nrw#i2XweniV7-ew|0?WTM>Uk?IM9v)58X8TJFxFOHls>C!Gan z{{|ObRs0X!y1Ak0-D#F?JLU2tkes!sEV?Y1Ya@qzaI$L1F{rF5vF8^r(xJtY#DW8CrP5OQ90(YW=h_{#~N&!wFi5;s)f+qf<`*8_+ fMvnt1#>3QODJx4?+9qnVu}83@_y0W1MQ#2C2A&IA diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.s b/tests/cleo_tests/FilesystemOperations/0A9A.s deleted file mode 100644 index d74a59dfddd12f95df2a3a8a92a80ac0cc210c0b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 354 zcmZ3&%*SHjXz6&Efl-l90K`(z$S+9Ei%-kUN!4UvWME;q2E?x}E2^u@fFg9mbQF{g6x75ssAq^2n3 z<>x7+R%8~JWagzSfULAl2DnQMf%?0!l&=CuOnt)!iCEz7PxR=}#f$mRHD9Kj<1t8crdJL1f01B&Y!T(cA$GC&B!7##&=1BC#`;9yr51||kZ7oY|PMLxT_ zBB%zOdfXg6eOz4>72Fbmu1QfS$ydlPNX=6KxlIr3w%J@jHxRVk5N0_s-r4bw2=5rd YOaywT6sUn1?-(gi$veg{%O`UI0La*b)c^nh diff --git a/tests/cleo_tests/FilesystemOperations/0A9C.s b/tests/cleo_tests/FilesystemOperations/0A9C.s deleted file mode 100644 index 260fef28cc492369de6ad699eda61316e95d8026..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 390 zcmZ3&%*SHjXz6^Ifl-l91juq!&`3`$iBHSSNsTYgtV-2nU}RunxCX?txcE4db5is5 z5{onW7>k%07m5$K{6g_3-Q{DRax1(5snz)qXP1#}5BBiMk%07;b^)8-)@A z+x`+60!Bz7U<1@dYzP?P3IS6`6Bn{WU^Xa{iAsmYFn8lk*6aTe84Pgafaxv+sEOEQ zZ43(ra+5VMfwKV9GbD90ae$Jz0=5*69>G_C6X`P(SU>@N77f%yYy_LYe1@LH=@h}H JFn3Sp0sz5gbTj|} diff --git a/tests/cleo_tests/MemoryOperations/0A8C.s b/tests/cleo_tests/MemoryOperations/0A8C.s deleted file mode 100644 index 750df0f1490d6b1626d8a39019069b26344dda0b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1910 zcmciCJ4?e*6ae6Jlc>-kAZ|{#iZAGpS4-_61aAdJQRoj4NrP6bN@%ez`32n^+{M`+ z;40|iELGqg!^VRAhkfc=!qeNUQt4#~+D#YQ4*b!u)It7f!(gZ|t$9jETOW$Za@ADjED1&Og>XxC%0gpCPU=n;$CPf-h<4z#eW zL@cB@3-gzSfsF}R7!ix>4^azakcDX`WFf~{FxER4v)Gt`g&DE9dKa}Y`&(G8NMtUQ zI1Ah)J!fPR8xydwA{G~Kq83&^3tcyi#O7io&Vn)4wqUqxad)cNn1Dq#VsZZ3vZxH- zo!O2>cRdzpXNP8Jdps;F!zTofjqTLxI|qfx_6+yv|5xXj7zJY?g^Ya(P~;*M%@wYz)kd z42)-h0LWruV7vSBx%4h}$H5e^3qaz6^|I|Ns97nxs{iURRd^LfDPeQBXEe2yhGzc6DK3Vqi1{ m>S9pjGp;Lw>cX$l&C%1x)kU!`uTC`uXg4zx12fP$leqvIoHYml diff --git a/tests/cleo_tests/MemoryOperations/0A97.s b/tests/cleo_tests/MemoryOperations/0A97.s deleted file mode 100644 index 18033d4f5f6cdd3abef66f6c10b2bdf7b6d4e969..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 240 zcmZ3&%*SHjXlZ_!fl-l93dB;-NKY+^FH6nHOwLJ-FUZf#D@iTVWME`qVQ^<+oxs@5 zyp#b*u`vJvf?x)5m_drBa{<{v)-@no62xF+03u%o#_<3D{{t=2t4pt|%K#zVw&^G+ z8z=-g1_!&kFfcJN3ITO8DDs)t6+v|p*6il!>Er66SeI9)ngVnKGZOnY0Le%@ Ai~s-t diff --git a/tests/cleo_tests/MemoryOperations/0A98.s b/tests/cleo_tests/MemoryOperations/0A98.s deleted file mode 100644 index e6ca9510aea852bea0475d21095a1b62b02689f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 242 zcmZ3&%*SHjXlZenfl-l962wx_NKY+^&rixqO)iNq$j{6xNiEW3U}RunaA#umW$b2V zX9QAg3_yS&m_Zz7kfIq}KsJzdgb_$z0}4ojc#I4{2-A(AcWIK z9R+0rg#gFkU{@CgCI&_UphgBoKC`+as78W%-5foATwN6F^6FGmfG%KWVqgZkXfhW7 D%9T0u diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt new file mode 100644 index 00000000..fe3111f3 --- /dev/null +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -0,0 +1,22 @@ +{$CLEO .s} + +script_name "0AA4" +debug_on + +trace "0AA4 get_dynamic_library_procedure" +wait 0 + +int load_library_addr = read_memory 0x858070 4 false +trace "Address of LoadLibrary function is %d" load_library_addr + +int kernel_dll_addr = call_function_return {address} load_library_addr {numParams} 1 {pop} 0 {funcParams} "kernel32.dll" // tested opcode +if + // lib address can be any valid pointer, not necessarily one loaded with 0AA2 opcode + int sleep_addr = get_dynamic_library_procedure {procName} "Sleep" {DynamicLibrary} kernel_dll_addr +then + trace "~g~~h~~h~0AA4 (get_dynamic_library_procedure), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AA4 (get_dynamic_library_procedure), #0 FAILED! Can't find procedure" +end + +terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0AC6.s b/tests/cleo_tests/MemoryOperations/0AC6.s deleted file mode 100644 index 0995cd97d1add166a62f03f679195ca883f890e2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 291 zcmZ3&%*SHj=xlbFfl-l90>o0#NKY+^&q+*5&519_&&(@HEz)FQWME-nV_*hR$G8}O z{rmqP$OZ~9F)*G1f?h6=6blPT;2Hx9qa;WvBLfgwg4MVfD+&5CFoysC{~u_LPF;Fk zT?Po@vQI}r*+3z{F*w-Og#l>AAE12x7#Pn0K`$4O%fi9}61c{|!YB!nW&|Niuo{pqUk1jg|Ns93ZPBVrudB-d zA?()aC@32!1ULoA@tSf@*!mrWI(bLD(MX@fg&d|`v$k^D#M8UP9 dAT_xpH3cZ9YG6^J;GdjaT9ldsbS%SUE&z)`LxcbT diff --git a/tests/cleo_tests/MemoryOperations/0AC8.s b/tests/cleo_tests/MemoryOperations/0AC8.s deleted file mode 100644 index 89f67acb46d38c6134569feb363bad7faaf7ed6c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 421 zcmZ3&%*SHj=xlMAfl-l948&5v3D8(`AD42*^U|NjRXr&X6;SC;`o*v-{ZP&QBqa10K1bzxv) zV3YvrVo>A@tSf@*!mrWI(bLD(MX@fg&d|`v$k^D#M8UP9AT_xpH3cZ9YG6^J;Gdja zT9lf?%mi{F*b6{w&HzC#7tpmVEG!H_DWE?XEI|ww2B6co{Ug@thDc721?nQ&>4pjj NryD>4KBrIS0syfFcLD$a diff --git a/tests/cleo_tests/MemoryOperations/0AC9.s b/tests/cleo_tests/MemoryOperations/0AC9.s deleted file mode 100644 index 71d15d339a30ad513106f68f82d13f99286ed9ef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 225 zcmZ3&%*SHj=xTYGfl-l90K`(zD9A5}Ps_rVPs(h@)%xnF*7q<1FCjr05aGZ7_^*x85r~a|Njp(SEVk!t}X+F z&@IqWP&QBqa10K1bzxuvIsvGjL6Oh3t_Z3gmliiiPajto#k#yYkh>IID+*GROHxyS VeAP4s|K#M-qSO?iI~XQ&0RS7~I_LlZ diff --git a/tests/cleo_tests/MemoryOperations/0AEA.s b/tests/cleo_tests/MemoryOperations/0AEA.s deleted file mode 100644 index ca34b8d6e8532dfcdb438235efc830449d5e1216..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 222 zcmZ3&%*SHj=<0Zwfl-l92*gs*NKY+^FGx*^FG@|*WME`qVff0(!obYHz{bD~;!WcM zvVp8uTtI@EiQyVh%o-#QWcxBOR{a0}A83|ZU3y(z1_)sqs-vK6pb+2~9PH}Cz{J3) z0@T5v$mdyC1l56ClbfTbkE@GfU0$84fklOaYehk7a!G0mjPIYETw0Wx0&xq&WG(;; CVmea* diff --git a/tests/cleo_tests/MemoryOperations/0AEB.s b/tests/cleo_tests/MemoryOperations/0AEB.s deleted file mode 100644 index 8687de188bc38cc418bf275c43b56e029dd4a3d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 255 zcmZ3&%*SHj=<0Nsfl-l948&5C&fa{&ovCWga|K>jtLHfsiu1QUZV17r06|NnuOXw{|H)n$MX z&^AX*+jJC^4HN<#gM(dN7?>CsnSr_(6#0DXilDmiYjkt;^l^1jtjnuYHL$2qaIGjv XO)g1If${y5lS_+IQy^|)n9KzLYIQ-A diff --git a/tests/cleo_tests/MemoryOperations/2400.s b/tests/cleo_tests/MemoryOperations/2400.s deleted file mode 100644 index b4b806e78e290029541247ab977123857a493b71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2443 zcmd7UJ4?e*6bJBgo7)N?n{f7IsM>N3E?5>{38(v0xJ8n51wGlrHx2 z*zr7eJdYjEW5@HWK$!3QxW;L3+w-cy^?kAPY(i%1rAno20WR?H&G#XfIq;f1@q1{g zw7gQba^8+-^J0pks+y+j%;Cih!!!$pVv)JLSh?MB?OoTw+>~5wv!8Bjw!PPM9TW=? zX|Orm)qlGZQCAD!OP}k-tB9)-a$UfLkgLKXxT16nT@%8t+85Wxi-@Zla=m#LbXCP& zKfd4?x=P}%`Tp;99F+#o0WIXZ|0L+DiMsO15JOiQ!8O}A2TmVFT=kIa3?_u;fG+0R zo47G_O-6U^;<<-3z)JL%BLmjLxl*AzZuNz;zUr2G0RA-HU?%! z2F5c$0Aw*SFx~|MAd8uS@f{E_ssNR+u&{unt}(DMS;AEVSxi8KfD}}zF9W0h|NsAi zma5gI*VSc!5T@-q3d#lw0gl1Jt}YBf+iv{(|DQpTFSD))sspzsH%Ct&R~N;)yt-s4 zP%wZ3g;Xd|aIGjvO)g1I0jg3pu&7W#5dO)@rA4VJK+gi*4RkYE!N7GZ{7+s8&P&0-Z&|ic@!Wbze_Wvd_B#g0#gpmBrNe4mZs2i zZh^Ckm79ry-x&%RfWQxkfmI3%a+SitA_7!~TBWQ5>nzUCO;yOr&x7d%CLgFyU=8yY zSZNVXK4wVCrwpirs2au$TL`JcT%(Ssa4m*ZGl&`n=rf?3876ael&6IP3j;xzDe0+2 Q3MHwYmJH+>B~77=q9{PDf);uNMVkNdM~nNT#r-#c ztT~Q@ACA~1+unDt9vRHi$vE9!%I9-gz!(pojsxM~foGS4S6VCvS|OQ^GdkO*cCr^8=t*-3c- diff --git a/tests/cleo_tests/MemoryOperations/2403.s b/tests/cleo_tests/MemoryOperations/2403.s deleted file mode 100644 index 6c99814fc2adf79c629e12d49b357dab1be80b8e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 zcmZ3&%*SG6Vqkojfl-l91jJI%NXsuuPc4bhP0h_Os?=m)WME-nV_;@rV01B75*_K<2%BCV1!V(;0LS29R~LrKTmS)L8Cw7V diff --git a/tests/cleo_tests/MemoryOperations/2404.s b/tests/cleo_tests/MemoryOperations/2404.s deleted file mode 100644 index 5e6e5eebd57ee495fcf3ca09b6a56e5a5da41ee1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 255 zcmZ3&%*SG6VqkKZfl-l96~t1|NKY+^FHSDXEC5m^MWx9l@mZzCCGp8csfi`2DVhu{ zD$EQF^SPKA8Lly~FjzAHc|f)=17p_z|Nnu;Sk|T2)n$MXQRe9=C>tmQI0gs1x-c*? zFfs!TVNm3YsVjmSLXtUdj-EcQE{b(|b*csy6$-8u1*yqEN5lC3$;qWfsVP9`0v*jT GnF|0jxlk(r From a77505c29c2268e122edc3af9ac689e86eb86753 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 4 Mar 2024 16:31:28 +0100 Subject: [PATCH 113/216] Minor fixes in Audio plugin. Fixed test scripts. (#91) * Minor fixes in Audio plugin. Fixed test scripts. * fixup! Minor fixes in Audio plugin. Fixed test scripts. --- cleo_plugins/Audio/Audio.cpp | 18 +++++++++++++----- cleo_plugins/Audio/CAudioStream.cpp | 6 ++++-- tests/cleo_tests/Audio/0AB9.txt | 25 +++++++++++++------------ tests/cleo_tests/Audio/0AC0.txt | 9 +++++---- tests/cleo_tests/Audio/0AC2.txt | 5 +++-- tests/cleo_tests/Audio/0AC4.txt | 3 ++- 6 files changed, 40 insertions(+), 26 deletions(-) diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index e7810ffd..fafc2df2 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -15,6 +15,14 @@ class Audio public: static CSoundSystem soundSystem; + enum eStreamAction + { + Stop, + Play, + Pause, + Resume, + }; + Audio() { auto cleoVer = CLEO_GetVersion(); @@ -113,12 +121,12 @@ class Audio { switch (action) { - case 0: stream->Stop(); break; - case 1: stream->Play(); break; - case 2: stream->Pause(); break; - case 3: stream->Resume(); break; + case eStreamAction::Stop: stream->Stop(); break; + case eStreamAction::Play: stream->Play(); break; + case eStreamAction::Pause: stream->Pause(); break; + case eStreamAction::Resume: stream->Resume(); break; default: - LOG_WARNING(thread, "Unknown audiostream's action (%d) in script %s", action, ScriptInfoStr(thread).c_str()); + LOG_WARNING(thread, "Unknown AudioStreamAction (%d) in script %s", action, ScriptInfoStr(thread).c_str()); } } diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index 24c46b2b..d29a23bf 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -70,8 +70,10 @@ void CAudioStream::SetProgress(float value) float CAudioStream::GetProgress() const { - auto total = BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE); - auto bytePos = BASS_ChannelGetPosition(streamInternal, BASS_POS_BYTE); + auto total = BASS_ChannelGetLength(streamInternal, BASS_POS_BYTE); // returns -1 on error + auto bytePos = BASS_ChannelGetPosition(streamInternal, BASS_POS_BYTE); // returns -1 on error + + if (bytePos == -1) bytePos = 0; // error or not available yet float progress = (float)bytePos / total; progress = std::clamp(progress, 0.0f, 1.0f); diff --git a/tests/cleo_tests/Audio/0AB9.txt b/tests/cleo_tests/Audio/0AB9.txt index 701c450b..bf95d38d 100644 --- a/tests/cleo_tests/Audio/0AB9.txt +++ b/tests/cleo_tests/Audio/0AB9.txt @@ -1,6 +1,7 @@ {$CLEO .s} {$USE debug} {$USE memory} +{$USE audio} var 0@ : Integer var 1@ : Integer var 2@ : Integer @@ -23,7 +24,7 @@ trace "0AB9 (get_audio_stream_state)" // load the file wait 0 if - load_audio_stream ".\Ding.mp3" store_to 0@ + load_audio_stream ".\Ding.mp3" {store_to} 0@ then trace "~g~~h~~h~0AB9 (get_audio_stream_state), #0 PASSED" else @@ -33,27 +34,27 @@ end // get state wait 0 -0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +0AB9: get_audio_stream_state 0@ {store_to} 1@ // tested opcode if - 1@ == 2 // paused by default + 1@ == 2 // AudioStreamState.Paused then trace "~g~~h~~h~0AB9 (get_audio_stream_state), #1 PASSED" else - breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #1 FAILED!~n~%d Expected~n~%d Occured" -1 1@ + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #1 FAILED!~n~%d Expected~n~%d Occured" 2 1@ end // set new state wait 0 -set_audio_stream_state 0@ state AudioStreamState.Play +set_audio_stream_state 0@ {state} AudioStreamAction.Play trace "~g~~h~~h~0AAD (set_audio_stream_state), #2 PASSED" // get updated state wait 0 -0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +0AB9: get_audio_stream_state 0@ {store_to} 1@ // tested opcode if - 1@ == 1 // play + 1@ == 1 // AudioStreamState.Playing then trace "~g~~h~~h~0AB9 (get_audio_stream_state), #3 PASSED" else @@ -62,14 +63,14 @@ end // check if state updated after playback end -wait 300 // Ding.mp3 is 0.25s long -0AB9: get_audio_stream_state 0@ store_to 1@ // tested opcode +wait 400 // Ding.mp3 is 0.25s long +0AB9: get_audio_stream_state 0@ {store_to} 1@ // tested opcode if - 1@ == -1 // AudioStreamState.Stop + 1@ == -1 // AudioStreamState.Stopped then - trace "~g~~h~~h~0AB9 (get_audio_stream_state), #3 PASSED" + trace "~g~~h~~h~0AB9 (get_audio_stream_state), #4 PASSED" else - breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #3 FAILED!~n~%d Expected~n~%d Occured" -1 1@ + breakpoint "~r~~h~~h~~h~0AB9 (get_audio_stream_state), #4 FAILED!~n~%d Expected~n~%d Occured" -1 1@ end diff --git a/tests/cleo_tests/Audio/0AC0.txt b/tests/cleo_tests/Audio/0AC0.txt index d72caf20..327aeae9 100644 --- a/tests/cleo_tests/Audio/0AC0.txt +++ b/tests/cleo_tests/Audio/0AC0.txt @@ -1,6 +1,7 @@ {$CLEO .s} {$USE debug} {$USE memory} +{$USE audio} var 0@ : Integer var 1@ : Integer var 2@ : Integer @@ -39,7 +40,7 @@ trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #1 PASSED" // start playback wait 0 -set_audio_stream_state 0@ state AudioStreamState.Play +set_audio_stream_state 0@ state AudioStreamAction.Play trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #2 PASSED" @@ -47,7 +48,7 @@ trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #2 PASSED" wait 0 get_audio_stream_state 0@ store_to 1@ if - 1@ == 1 // play + 1@ == 1 // AudioStreamState.Playing then trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #3 PASSED" else @@ -59,7 +60,7 @@ end wait 400 // Ding.mp3 is 0.25s long get_audio_stream_state 0@ store_to 1@ if - 1@ == 1 // play + 1@ == 1 // AudioStreamState.Playing then trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #4 PASSED" else @@ -69,7 +70,7 @@ end // stop playback wait 0 -set_audio_stream_state 0@ state AudioStreamState.Stop +set_audio_stream_state 0@ state AudioStreamAction.Stop trace "~g~~h~~h~0AC0 (set_audio_stream_looped), #5 PASSED" diff --git a/tests/cleo_tests/Audio/0AC2.txt b/tests/cleo_tests/Audio/0AC2.txt index 3920ec87..35cb2c51 100644 --- a/tests/cleo_tests/Audio/0AC2.txt +++ b/tests/cleo_tests/Audio/0AC2.txt @@ -1,6 +1,7 @@ {$CLEO .s} {$USE debug} {$USE memory} +{$USE audio} var 0@ : Integer var 1@ : Integer var 2@ : Integer @@ -57,7 +58,7 @@ trace "~g~~h~~h~0AC2 (set_play_3d_audio_stream_at_coords), #3 PASSED" wait 0 print_big_formatted "LEFT________________________" {time} 300 {style} TextStyle.MiddleSmaller set_audio_stream_volume 0@ volume 10.0 -set_audio_stream_state 0@ state AudioStreamState.Play +set_audio_stream_state 0@ state AudioStreamAction.Play wait 250 trace "~g~~h~~h~0AC2 (set_play_3d_audio_stream_at_coords), #4 PASSED" @@ -78,7 +79,7 @@ trace "~g~~h~~h~0AC2 (set_play_3d_audio_stream_at_coords), #3 PASSED" wait 0 print_big_formatted "________________________RIGHT" {time} 300 {style} TextStyle.MiddleSmaller set_audio_stream_volume 0@ volume 10.0 -set_audio_stream_state 0@ state AudioStreamState.Play +set_audio_stream_state 0@ state AudioStreamAction.Play wait 250 trace "~g~~h~~h~0AC2 (set_play_3d_audio_stream_at_coords), #4 PASSED" diff --git a/tests/cleo_tests/Audio/0AC4.txt b/tests/cleo_tests/Audio/0AC4.txt index 6c757c0b..273a5a1a 100644 --- a/tests/cleo_tests/Audio/0AC4.txt +++ b/tests/cleo_tests/Audio/0AC4.txt @@ -4,6 +4,7 @@ nop {$CLEO .s} {$USE debug} {$USE memory} +{$USE audio} var 0@ : Integer var 1@ : Integer var 2@ : Integer @@ -53,7 +54,7 @@ trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #2 PASSED" // play the sound wait 0 print_big_formatted "AT PLAYER CHAR" {time} 300 {style} TextStyle.MiddleSmaller -set_audio_stream_state 0@ state AudioStreamState.Play +set_audio_stream_state 0@ state AudioStreamAction.Play wait 250 trace "~g~~h~~h~0AC4 (set_play_3d_audio_stream_at_char), #3 PASSED" From c23720bbb9948649b3be49297388dc05d08d4410 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 5 Mar 2024 00:40:06 +0100 Subject: [PATCH 114/216] Fixed handled opcode parameters counting. Bug fixes. (#93) * Fixed handled opcode parameters counting. Bug fixes. * fixup! Fixed handled opcode parameters counting. Bug fixes. --- .../FileSystemOperations.cpp | 4 +-- cleo_plugins/IntOperations/IntOperations.cpp | 18 ++++++------- cleo_sdk/CLEO_Utils.h | 25 ++++++++++--------- source/CScriptEngine.cpp | 13 +++++++++- source/CScriptEngine.h | 6 +---- 5 files changed, 37 insertions(+), 29 deletions(-) diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 15e08b9c..bb5930b2 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -182,7 +182,7 @@ class FileSystemOperations { auto handle = READ_FILE_HANDLE_PARAM(); auto size = OPCODE_READ_PARAM_INT(); - auto destination = OPCODE_READ_PARAM_OUTPUT_VAR(); + auto destination = OPCODE_READ_PARAM_OUTPUT_VAR_ANY32(); if (size < 0) { @@ -372,7 +372,7 @@ class FileSystemOperations { auto handle = READ_FILE_HANDLE_PARAM(); OPCODE_READ_PARAM_STRING(format); - auto result = OPCODE_READ_PARAM_OUTPUT_VAR(); + auto result = OPCODE_READ_PARAM_OUTPUT_VAR_ANY32(); size_t paramCount = 0; SCRIPT_VAR* outputParams[35]; diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/IntOperations/IntOperations.cpp index 46758529..de0ce001 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/IntOperations/IntOperations.cpp @@ -204,7 +204,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam &= value; + *operand &= value; return OR_CONTINUE; } @@ -217,7 +217,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam |= value; + *operand |= value; return OR_CONTINUE; } @@ -230,7 +230,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam ^= value; + *operand ^= value; return OR_CONTINUE; } @@ -242,7 +242,7 @@ class IntOperations { auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); - operand->dwParam = ~operand->dwParam; + *operand = ~*operand; return OR_CONTINUE; } @@ -255,7 +255,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam %= value; + *operand %= value; return OR_CONTINUE; } @@ -268,7 +268,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam >>= value; + *operand >>= value; return OR_CONTINUE; } @@ -281,7 +281,7 @@ class IntOperations auto operand = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); auto value = OPCODE_READ_PARAM_INT(); - operand->dwParam <<= value; + *operand <<= value; return OR_CONTINUE; } @@ -301,11 +301,11 @@ class IntOperations } size_t offset = size * 8 - 1; // bit offset of top most bit in source value - bool signBit = operand->dwParam & (1 << offset); + bool signBit = *operand & (1 << offset); if(signBit) { - operand->dwParam |= 0xFFFFFFFF << offset; // set all upper bits + *operand |= 0xFFFFFFFF << offset; // set all upper bits } return OR_CONTINUE; diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 79e36b0e..d5751152 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -16,8 +16,8 @@ namespace CLEO LOG_WARNING(script, format, ...) // warning text on screen and in log file. Not displayed for scripts in 'legacy' mode SHOW_ERROR(a,...) // message box, log to file - Macros to use inside opcode handler functions. Include types validation, printing warnings and suspending script on critical errors. - Please mind those might expand into multiple lines, so should not, for example, be used as body of 'if' statements without brackets! + Macros to use inside opcode handler functions. Performs types validation, printing warnings and suspending script on critical errors. + Please mind those might expand into multiple lines, so should, for example, not be used as body of 'if' statements without brackets! OPCODE_CONDITION_RESULT(value) // set result OPCODE_SKIP_PARAMS(count) // ignore X params @@ -31,7 +31,7 @@ namespace CLEO OPCODE_READ_PARAM_INT() OPCODE_READ_PARAM_UINT() OPCODE_READ_PARAM_FLOAT() - OPCODE_READ_PARAM_ANY32() // get raw data of simple-type value (practically integers and floats) + OPCODE_READ_PARAM_ANY32() // get raw data of any simple-type value (practically integers and floats) OPCODE_READ_PARAM_STRING(varName) // reads param and creates const char* variable named 'varName' with pointer to null-terminated string OPCODE_READ_PARAM_STRING_LEN(varName, maxLength) // same as above, but text length is clamped to maxLength OPCODE_READ_PARAM_FILEPATH(varName) // reads param and creates const char* variable named 'varName' with pointer to resolved, null-terminated, filepath @@ -39,9 +39,10 @@ namespace CLEO OPCODE_READ_PARAM_OBJECT_HANDLE() // read and validate game object handle OPCODE_READ_PARAM_PED_HANDLE() // read and validate character (ped/actor) handle OPCODE_READ_PARAM_VEHICLE_HANDLE() // read and validate vehicle handle - OPCODE_READ_PARAM_OUTPUT_VAR() // store variable param pointer to write result later - OPCODE_READ_PARAM_OUTPUT_VAR_INT() // pointer to write integer result later - OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() // pointer to write float result later + // for opcodes with mixed params order, where 'strore_to' occurs before input arguments + OPCODE_READ_PARAM_OUTPUT_VAR_INT() // get pointer to integer variable param to write result later + OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() // get pointer to float variable param to write result later + OPCODE_READ_PARAM_OUTPUT_VAR_ANY32() // get pointer to simple-type variable param to write result later // writing opcode output/result data OPCODE_WRITE_PARAM_BOOL(value) @@ -445,15 +446,15 @@ namespace CLEO if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ else if (!IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_READ_PARAM_OUTPUT_VAR() _readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_READ_PARAM_OUTPUT_VAR_ANY32() _readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable int or float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_READ_PARAM_OUTPUT_VAR_INT() _readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + #define OPCODE_READ_PARAM_OUTPUT_VAR_INT() (int*)_readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() _readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() (float*)_readParamVariable(thread); \ + if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 1e309daa..1c1cff1f 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -134,6 +134,7 @@ namespace CLEO if (buff == nullptr || buffLen < 0) { LOG_WARNING(0, "Invalid ReadStringParam input argument! Ptr: 0x%08X, Size: %d", buff, buffLen); + CLEO_SkipOpcodeParams(thread, 1); return nullptr; } @@ -159,6 +160,7 @@ namespace CLEO } else if (paramType == DT_VARLEN_STRING) { + GetInstance().OpcodeSystem.handledParamCount++; thread->IncPtr(1); // already processed paramType DWORD length = *thread->GetBytePointer(); // as unsigned byte! @@ -180,6 +182,7 @@ namespace CLEO { case DT_TEXTLABEL: { + GetInstance().OpcodeSystem.handledParamCount++; memcpy(buff, str, min(buffLen, 8)); thread->IncPtr(8); // text data return buff; @@ -187,6 +190,7 @@ namespace CLEO case DT_STRING: { + GetInstance().OpcodeSystem.handledParamCount++; memcpy(buff, str, min(buffLen, 16)); thread->IncPtr(16); // ext data return buff; @@ -225,10 +229,17 @@ namespace CLEO // unsupported param type LOG_WARNING(thread, "Argument #%d expected to be string, got %s in script %s", CLEO_GetParamsHandledCount(), ToKindStr(paramType, arrayType), ScriptInfoStr(thread).c_str()); - GetScriptParams(thread, 1); // try skip unhandled param + CLEO_SkipOpcodeParams(thread, 1); // try skip unhandled param return nullptr; // error } + SCRIPT_VAR* GetScriptParamPointer(CRunningScript* thread) + { + SCRIPT_VAR* ptr = GetScriptParamPointer2(thread, 0); + GetInstance().OpcodeSystem.handledParamCount++; // TODO: hook game's GetScriptParamPointer1 and GetScriptParamPointer2 procedures so this is always incremented + return ptr; + } + SCRIPT_VAR * __fastcall _GetScriptParamPointer2(CRunningScript *pScript, int dummy, int unused) { _asm diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index fb73e70b..07efc805 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -161,11 +161,7 @@ namespace CLEO // WARNING: Null terminator ommited if not enought space in the buffer! const char* __fastcall GetScriptStringParam(CRunningScript* thread, int dummy, char* buff, int buffLen); - inline SCRIPT_VAR * GetScriptParamPointer(CRunningScript *thread) - { - SCRIPT_VAR* ptr = GetScriptParamPointer2(thread, 0); - return ptr; - } + inline SCRIPT_VAR* GetScriptParamPointer(CRunningScript* thread); extern "C" { extern CRunningScript *staticThreads; From edbed65a61f34d8f36d69c7773095ec6f4d39456 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 5 Mar 2024 04:54:17 +0100 Subject: [PATCH 115/216] File related opcodes fixes. More tests. (#88) * Fixes in file operations plugin. New opcodes and unit tests. * fixup! Fixes in file operations plugin. New opcodes and unit tests. * fixup! Fixes in file operations plugin. New opcodes and unit tests. * fixup! Fixes in file operations plugin. New opcodes and unit tests. * File mode handling updated. --- CHANGELOG.md | 7 +- .../FileSystemOperations.cpp | 69 ++--- .../FileSystemOperations.vcxproj | 2 + .../FileSystemOperations.vcxproj.filters | 11 + .../FileSystemOperations/FileUtils.cpp | 46 ++++ .../MemoryOperations/MemoryOperations.cpp | 25 ++ .../cleo_tests/FilesystemOperations/0A9E.txt | 234 ++++++++++++++++ .../cleo_tests/FilesystemOperations/0AD7.txt | 252 ++++++++++++++++++ .../cleo_tests/FilesystemOperations/2301.txt | 179 +++++++++++++ .../cleo_tests/FilesystemOperations/2302.txt | 240 +++++++++++++++++ tests/cleo_tests/MemoryOperations/2407.txt | 164 ++++++++++++ 11 files changed, 1197 insertions(+), 32 deletions(-) create mode 100644 tests/cleo_tests/FilesystemOperations/0A9E.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0AD7.txt create mode 100644 tests/cleo_tests/FilesystemOperations/2301.txt create mode 100644 tests/cleo_tests/FilesystemOperations/2302.txt create mode 100644 tests/cleo_tests/MemoryOperations/2407.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index ba167ef7..09368e45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,8 +30,9 @@ - added/fixed support of all file stream opcodes in legacy mode (Cleo3) - new opcode **2300 ([get_file_position](https://library.sannybuilder.com/#/sa/file/2300))** - new opcode **2301 ([read_block_from_file](https://library.sannybuilder.com/#/sa/file/2301))** - - **2302 ([resolve_filepath](https://library.sannybuilder.com/#/sa/file/2302))** - - **2303 ([get_script_filename](https://library.sannybuilder.com/#/sa/file/2303))** + - new opcode **2302 ([write_block_to_file](https://library.sannybuilder.com/#/sa/file/2302))** + - new opcode **2303 ([resolve_filepath](https://library.sannybuilder.com/#/sa/file/2303))** + - new opcode **2304 ([get_script_filename](https://library.sannybuilder.com/#/sa/file/2304))** - new [MemoryOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/MemoryOperations) plugin - memory related opcodes moved from CLEO core into separated plugin - validation of input and output parameters for all opcodes @@ -43,6 +44,7 @@ - new opcode **2404 ([get_script_struct_just_created](https://library.sannybuilder.com/#/sa/memory/2404))** - new opcode **2405 ([is_script_running](https://library.sannybuilder.com/#/sa/memory/2405))** - new opcode **2406 ([get_script_struct_from_filename](https://library.sannybuilder.com/#/sa/memory/2406))** + - new opcode **2407 ([is_memory_equal](https://library.sannybuilder.com/#/sa/memory/2407))** - new and updated opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** @@ -102,6 +104,7 @@ #### CLEO internal +- introduced unit test scripts - project migrated to VS 2022 - configured game debugging settings - plugins moved into single solution diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index bb5930b2..a8a98217 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -73,8 +73,9 @@ class FileSystemOperations CLEO_RegisterOpcode(0x2300, opcode_2300); // get_file_position CLEO_RegisterOpcode(0x2301, opcode_2301); // read_block_from_file - CLEO_RegisterOpcode(0x2302, opcode_2302); // resolve_filepath - CLEO_RegisterOpcode(0x2303, opcode_2303); // get_script_filename + CLEO_RegisterOpcode(0x2302, opcode_2302); // write_block_to_file + CLEO_RegisterOpcode(0x2303, opcode_2303); // resolve_filepath + CLEO_RegisterOpcode(0x2304, opcode_2304); // get_script_filename // register event callbacks CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); @@ -209,6 +210,7 @@ class FileSystemOperations if (size == 0) { + OPCODE_SKIP_PARAMS(1); // from return OR_CONTINUE; // done } @@ -298,37 +300,20 @@ class FileSystemOperations if (size < 0) { - auto info = ScriptInfoStr(thread); - SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, info.c_str()); + SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } if (size == 0) { - if (bufferSize > 0) buffer[0] = '\0'; - OPCODE_CONDITION_RESULT(false); - return OR_CONTINUE; - } - - std::vector tmpBuff; - tmpBuff.resize(size); - auto data = tmpBuff.data(); - - bool ok = File::readString(handle, data, size) != nullptr; - if(!ok) - { - OPCODE_CONDITION_RESULT(false); + OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } - // copy into result param - int len = strlen(data); - int resultSize = min(len, bufferSize - (int)needsTerminator); - - memcpy(buffer, data, resultSize); - if(resultSize < bufferSize) buffer[resultSize] = '\0'; // terminate string whenever possible + // use caller's size argument, ignoring actual target type size. Intended for legacy reasons. + bool ok = File::readString(handle, buffer, size) != nullptr; - OPCODE_CONDITION_RESULT(true); + OPCODE_CONDITION_RESULT(ok); return OR_CONTINUE; } @@ -686,8 +671,7 @@ class FileSystemOperations if (size < 0) { - auto info = ScriptInfoStr(thread); - SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, info.c_str()); + SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } @@ -708,8 +692,33 @@ class FileSystemOperations return OR_CONTINUE; } - //2302=2,%2s% = resolve_filepath %1s% - static OpcodeResult __stdcall opcode_2302(CRunningScript* thread) + //2302=3,write_block_to_file %1d% size %2d% address %3d% // IF and SET + static OpcodeResult WINAPI opcode_2302(CRunningScript* thread) + { + auto handle = READ_FILE_HANDLE_PARAM(); + auto size = OPCODE_READ_PARAM_INT(); + auto source = OPCODE_READ_PARAM_PTR(); + + if (size < 0) + { + SHOW_ERROR("Invalid size argument (%d) in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + if (size == 0) + { + OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; + } + + auto readCount = File::write(handle, source, size); + + OPCODE_CONDITION_RESULT(readCount == size); + return OR_CONTINUE; + } + + //2303=2,%2s% = resolve_filepath %1s% + static OpcodeResult __stdcall opcode_2303(CRunningScript* thread) { OPCODE_READ_PARAM_FILEPATH(path); // it also resolves the path to absolute form @@ -717,8 +726,8 @@ class FileSystemOperations return OR_CONTINUE; } - //2303=3,%3s% = get_script_filename %1d% full_path %2d% // IF and SET - static OpcodeResult __stdcall opcode_2303(CRunningScript* thread) + //2304=3,%3s% = get_script_filename %1d% full_path %2d% // IF and SET + static OpcodeResult __stdcall opcode_2304(CRunningScript* thread) { auto script = OPCODE_READ_PARAM_INT(); auto fullPath = OPCODE_READ_PARAM_BOOL(); diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 742cef3a..035bcfa0 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -118,6 +118,8 @@ if defined GTA_SA_DIR ( + + diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters index 6e505e7d..33fef5b0 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters @@ -7,5 +7,16 @@ + + cleo_sdk + + + cleo_sdk + + + + + {a2c39c52-f49e-4ffe-bb0a-661ab07131b9} + \ No newline at end of file diff --git a/cleo_plugins/FileSystemOperations/FileUtils.cpp b/cleo_plugins/FileSystemOperations/FileUtils.cpp index bb698681..270fc130 100644 --- a/cleo_plugins/FileSystemOperations/FileUtils.cpp +++ b/cleo_plugins/FileSystemOperations/FileUtils.cpp @@ -1,4 +1,5 @@ #include "FileUtils.h" +#include "CLEO_Utils.h" #include DWORD File::FUNC_fopen = 0; @@ -84,6 +85,51 @@ bool File::flush(DWORD handle) DWORD File::open(const char* filename, const char* mode, bool legacy) { + // validate the mode argument + if (!legacy) + { + static char modeUpdated[12]; + const std::string allowed = "+abcnrtwxDRST"; // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/fopen-wfopen?view=msvc-170 + + bool valid = false; + bool binary = false; + bool text = false; + auto modeLen = mode != nullptr ? strlen(mode) : 0; + if (modeLen > 0 && modeLen < (sizeof(modeUpdated) - 1)) // keep space for extra binary mode char + { + valid = true; + + for (auto ch : std::string_view(mode)) + { + if (allowed.find(ch) == std::string_view::npos) + { + valid = false; + break; // invalid character + } + + if (ch == 'b') binary = true; + if (ch == 't') text = true; + } + + if (binary && text) valid = false; + + // By default open as binary mode. + // Generally text mode is not well documented in C and many file related functions has undefined behavior. For example 'ftell' returns invalid values. + if (valid && !binary) + { + strcpy(modeUpdated, mode); + strcat(modeUpdated, "b"); + mode = modeUpdated; + } + } + + if (!valid) + { + LOG_WARNING(0, "Invalid mode argument '%s' while opening file \"%s\" stream!", mode, filename); + return 0; // invalid handle + } + } + FILE* file = nullptr; if (legacy) { diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index bd3f2143..8220dcf7 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -60,6 +60,7 @@ class MemoryOperations CLEO_RegisterOpcode(0x2404, opcode_2404); // get_script_struct_just_created CLEO_RegisterOpcode(0x2405, opcode_2405); // is_script_running CLEO_RegisterOpcode(0x2406, opcode_2406); // get_script_struct_from_filename + CLEO_RegisterOpcode(0x2407, opcode_2407); // is_memory_equal // register event callbacks @@ -788,6 +789,30 @@ class MemoryOperations OPCODE_CONDITION_RESULT(address != nullptr); return OR_CONTINUE; } + + //2407=3, is_memory_equal address_a %1d% address_b %2d% size %d3% + static OpcodeResult __stdcall opcode_2407(CLEO::CScriptThread* thread) + { + auto addressA = OPCODE_READ_PARAM_PTR(); + auto addressB = OPCODE_READ_PARAM_PTR(); + auto size = OPCODE_READ_PARAM_INT(); + + if (size == 0) + { + OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; + } + if (size < 0) + { + SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + auto result = memcmp(addressA, addressB, size); + + OPCODE_CONDITION_RESULT(result == 0); + return OR_CONTINUE; + } } Memory; std::set MemoryOperations::m_allocations; diff --git a/tests/cleo_tests/FilesystemOperations/0A9E.txt b/tests/cleo_tests/FilesystemOperations/0A9E.txt new file mode 100644 index 00000000..da4790cd --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0A9E.txt @@ -0,0 +1,234 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + + +script_name "0A9E" // write_to_file +debug_on + +trace "0A9E (write_to_file)" + +const Test_Filename = "cleo:\cleo_tests\test_file.dat" + + +// delete old test file if presesnt +wait 0 +if + does_file_exist Test_Filename +then + if + not delete_file Test_Filename + then + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), Failed to delete previous test's file '%s'!" Test_Filename + end +end + + +// create new file +wait 0 +if + 0@ = open_file Test_Filename {mode} "w" +then + trace "~g~~h~~h~0A9E (write_to_file), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #1 FAILED! Failed to create '%s' file." Test_Filename +end + + +// test file size +wait 0 +1@ = get_file_size 0@ +if + 1@ == 0 +then + trace "~g~~h~~h~0A9E (write_to_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #2 FAILED!~n~0 Expected~n~%d Occured" 1@ +end + + +// write 0 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 0 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 0 +then + trace "~g~~h~~h~0A9E (write_to_file), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #3 FAILED!~n~%d Expected~n~%d Occured" 0 1@ +end + + +// write 1 byte +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 1 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 1 +then + trace "~g~~h~~h~0A9E (write_to_file), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #4 FAILED!~n~%d Expected~n~%d Occured" 1 1@ +end + + +// write 2 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 2 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 3 +then + trace "~g~~h~~h~0A9E (write_to_file), #5 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #5 FAILED!~n~%d Expected~n~%d Occured" 3 1@ +end + + +// write 3 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 3 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 6 +then + trace "~g~~h~~h~0A9E (write_to_file), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #6 FAILED!~n~%d Expected~n~%d Occured" 6 1@ +end + + +// write 4 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 4 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 10 +then + trace "~g~~h~~h~0A9E (write_to_file), #7 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #7 FAILED!~n~%d Expected~n~%d Occured" 10 1@ +end + + +// write 5 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 5 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 15 +then + trace "~g~~h~~h~0A9E (write_to_file), #8 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #8 FAILED!~n~%d Expected~n~%d Occured" 15 1@ +end + + +// write 9 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +0A9E: write_to_file 0@ {size} 9 {source} 3@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 24 +then + trace "~g~~h~~h~0A9E (write_to_file), #9 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #9 FAILED!~n~%d Expected~n~%d Occured" 24 1@ +end + + +// close file +wait 0 +close_file 0@ +trace "~g~~h~~h~0A9E (write_to_file), #10 PASSED" + + +// reopen to read +wait 0 +if + 0@ = open_file Test_Filename {mode} "r" +then + trace "~g~~h~~h~0A9E (write_to_file), #11 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #11 FAILED! Failed to open '%s' file." Test_Filename +end + + +// read and verify the data +wait 0 +2@ = 0 +3@ = 0 +4@ = 0 +5@ = 0 +read_from_file 0@ {size} 24 {destination} 2@ +if + 2@ == 0x44554444 + 3@ == 0x55446655 + 4@ == 0x55447766 + 5@ == 0x44887766 +then + trace "~g~~h~~h~0A9E (write_to_file), #12 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #12 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0x44554444 0x55446655 0x55447766 0x44887766 2@ 3@ 4@ 5@ +end + + +// close file +wait 0 +close_file 0@ +trace "~g~~h~~h~0A9E (write_to_file), #13 PASSED" + + +// delete file +wait 0 +if + delete_file Test_Filename +then + trace "~g~~h~~h~0A9E (write_to_file), #14 PASSED" +else + breakpoint "~r~~h~~h~~h~0A9E (write_to_file), #12 FAILED! Failed to delete '%s' file." Test_Filename +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/FilesystemOperations/0AD7.txt b/tests/cleo_tests/FilesystemOperations/0AD7.txt new file mode 100644 index 00000000..91151154 --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0AD7.txt @@ -0,0 +1,252 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +goto @DATA_END +hex + "t1" 0A "t2" 0D 0A "t3" 09 "e s" 0A "very long test no.4" 0A "even longer test string to read number 5" 0A +end +:DATA_END + + +script_name "0AD7" // read_string_from_file +debug_on +trace "0AD7 (read_string_from_file)" + + +// open the file +wait 0 +if + 0@ = open_file ".\0AD7.s" {mode} "r" +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #0 FAILED! Failed to open file." +end + + +// seek file to hex data block +wait 0 +file_seek 0@ {offset} 7 {origin} SeekOrigin.Begin +trace "~g~~h~~h~0AD7 (read_string_from_file), #1 PASSED" + + +// read 0 +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 0 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #2 FAILED! Condition result: FALSE" +end +if and + 1@ == 0xcccccccc + 2@ == 0xcccccccc + 3@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #3 FAILED!~n~%08x %08x %08x Expected~n~%08x %08x %08x Occured" 0xcccccccc 0xcccccccc 0xcccccccc 1@ 2@ 3@ +end + + +// read 1 +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 1 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #4 FAILED! Condition result: FALSE" +end +if and + 1@ == 0xcccccccc + 2@s == '' + 4@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #5 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #5 FAILED!~n~%08x '%s' %08x Expected~n~%08x '%s' %08x Occured" 0xcccccccc '' 0xcccccccc 1@ 2@s 4@ +end + + +// read ended by new line +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 8 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #6 FAILED! Condition result: FALSE" +end +string_format 6@s {format} "t1%c" {args} 0x0A // ended with new line +if and + 1@ == 0xcccccccc + 2@s == 6@s + 4@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #7 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #7 FAILED!~n~%08x '%s' %08x %08x Expected~n~%08x '%s' %08x %08x Occured" 0xcccccccc 6@s 0xcccccccc 1@ 2@s 4@ +end + + +// read ended by new line (Windows) +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 8 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #8 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #8 FAILED! Condition result: FALSE" +end +string_format 6@s {format} "t2%c%c" {args} 0x0D 0x0A // ended with new line +if and + 1@ == 0xcccccccc + 2@s == 6@s + 4@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #9 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #9 FAILED!~n~%08x '%s' %08x Expected~n~%08x '%s' %08x Occured" 0xcccccccc 6@s 0xcccccccc 1@ 2@s 4@ +end + + +// read not splited by tab or space +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 8 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #10 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #10 FAILED! Condition result: FALSE" +end +string_format 6@s {format} "t3%ce s%c" {args} 0x09 0x0A +if and + 1@ == 0xcccccccc + 2@s == 6@s + 4@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #11 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #11 FAILED!~n~%08x '%s' %08x Expected~n~%08x '%s' %08x Occured" 0xcccccccc 6@s 0xcccccccc 1@ 2@s 4@ +end + + +// read longer than longString variable (expect overflow) +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = 0xcccccccc +6@ = 0xcccccccc +7@ = 0xcccccccc +8@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 32 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #12 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #12 FAILED! Condition result: FALSE" +end +9@ = allocate_memory 64 +string_format 9@ {format} "very long test no.4%c" {args} 0x0A +10@ = get_var_pointer 2@ +if and + 1@ == 0xcccccccc + is_memory_equal 9@ 10@ {size} 21 // including terminator + 8@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #13 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #13 FAILED!~n~%08x '%s' %08x Expected~n~%08x '%s' %08x Occured" 0xcccccccc 9@ 0xcccccccc 1@ 10@ 8@ +end +free_memory 9@ + + +// read limited by max_lenght param +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +if + 0AD7: read_string_from_file 0@ {store_to} 2@s {max_lenght} 4 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #14 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #14 FAILED! Condition result: FALSE" +end +if and + 1@ == 0xcccccccc + 2@ == 0x00657665 // "eve\0" + 3@ == 0xcccccccc +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #15 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #15 FAILED!~n~%08x %08x %08x Expected~n~%08x %08x %08x Occured" 0xcccccccc 0x00657665 0xcccccccc 1@ 2@ 3@ +end + + +// read into memory address +wait 0 +1@ = allocate_memory 64 +if + 0AD7: read_string_from_file 0@ {store_to} 1@ {max_lenght} 64 // tested opcode +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #16 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #16 FAILED! Condition result: FALSE" +end +2@ = allocate_memory 64 +string_format 2@ {format} "n longer test string to read number 5%c" {args} 0x0A +if + is_memory_equal 1@ 2@ {size} 39 // including terminator +then + trace "~g~~h~~h~0AD7 (read_string_from_file), #17 PASSED" +else + breakpoint "~r~~h~~h~~h~0AD7 (read_string_from_file), #17 FAILED!~n~'%s' Expected~n~'%s' Occured" 0xcccccccc 1@ 2@ +end +free_memory 1@ +free_memory 2@ + + +// close the file +wait 0 +close_file 0@ +trace "~g~~h~~h~0AD7 (read_string_from_file), #18 PASSED" + + +terminate_this_custom_script diff --git a/tests/cleo_tests/FilesystemOperations/2301.txt b/tests/cleo_tests/FilesystemOperations/2301.txt new file mode 100644 index 00000000..0627169e --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/2301.txt @@ -0,0 +1,179 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +goto @DATA_END +hex + FF EE DD CC BB AA 99 88 77 66 55 44 33 22 11 00 +end +:DATA_END + + +script_name "2301" // read_block_from_file +debug_on + +trace "2301 (read_block_from_file)" + + +// open the file +wait 0 +if + 0@ = open_file ".\2301.s" {mode} "r" +then + trace "~g~~h~~h~2301 (read_block_from_file), #0 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #0 FAILED! Failed to open file." +end + + +// seek file to hex data block +wait 0 +file_seek 0@ {offset} 7 {origin} SeekOrigin.Begin +trace "~g~~h~~h~2301 (read_block_from_file), #1 PASSED" + + +// read 0 bytes +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +if + 2301: read_block_from_file 0@ {size} 0 {destination} 5@ // tested opcode +then + trace "~g~~h~~h~2301 (read_block_from_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #2 FAILED! Condition result: FALSE" +end +if and + 1@ == 0xcccccccc + 2@ == 0xcccccccc + 3@ == 0xcccccccc + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #3 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0xcccccccc 0xcccccccc 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// read 1 byte +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +2301: read_block_from_file 0@ {size} 1 {destination} 5@ // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xccccccff + 3@ == 0xcccccccc + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #4 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0xccccccff 0xcccccccc 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// read 2 bytes +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +2301: read_block_from_file 0@ {size} 2 {destination} 5@ // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xccccddee + 3@ == 0xcccccccc + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #5 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #5 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0xccccddee 0xcccccccc 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// read 3 bytes +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +2301: read_block_from_file 0@ {size} 3 {destination} 5@ // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0xccaabbcc + 3@ == 0xcccccccc + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #6 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0xccaabbcc 0xcccccccc 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// read 4 bytes +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +2301: read_block_from_file 0@ {size} 4 {destination} 5@ // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0x66778899 + 3@ == 0xcccccccc + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #7 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #7 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0x66778899 0xcccccccc 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// read 6 bytes +wait 0 +1@ = 0xcccccccc +2@ = 0xcccccccc +3@ = 0xcccccccc +4@ = 0xcccccccc +5@ = get_var_pointer 2@ +2301: read_block_from_file 0@ {size} 6 {destination} 5@ // tested opcode +if and + 1@ == 0xcccccccc + 2@ == 0x22334455 + 3@ == 0xcccc0011 + 4@ == 0xcccccccc +then + trace "~g~~h~~h~2301 (read_block_from_file), #8 PASSED" +else + breakpoint "~r~~h~~h~~h~2301 (read_block_from_file), #8 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0xcccccccc 0x22334455 0xcccc0011 0xcccccccc 1@ 2@ 3@ 4@ +end + + +// close the file +wait 0 +close_file 0@ +trace "~g~~h~~h~2301 (read_block_from_file), #9 PASSED" + + +terminate_this_custom_script diff --git a/tests/cleo_tests/FilesystemOperations/2302.txt b/tests/cleo_tests/FilesystemOperations/2302.txt new file mode 100644 index 00000000..6d8a34e1 --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/2302.txt @@ -0,0 +1,240 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "2302" // write_block_to_file +debug_on + +trace "2302 (write_block_to_file)" + +const Test_Filename = "cleo:\cleo_tests\test_file.dat" + + +// delete old test file if presesnt +wait 0 +if + does_file_exist Test_Filename +then + if + not delete_file Test_Filename + then + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), Failed to delete previous test's file '%s'!" Test_Filename + end +end + + +// create new file +wait 0 +if + 0@ = open_file Test_Filename {mode} "w" +then + trace "~g~~h~~h~2302 (write_block_to_file), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #1 FAILED! Failed to create '%s' file." Test_Filename +end + + +// test file size +wait 0 +1@ = get_file_size 0@ +if + 1@ == 0 +then + trace "~g~~h~~h~2302 (write_block_to_file), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #2 FAILED!~n~0 Expected~n~%d Occured" 1@ +end + + +// write 0 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 0 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 0 +then + trace "~g~~h~~h~2302 (write_block_to_file), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #3 FAILED!~n~%d Expected~n~%d Occured" 0 1@ +end + + +// write 1 byte +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 1 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 1 +then + trace "~g~~h~~h~2302 (write_block_to_file), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #4 FAILED!~n~%d Expected~n~%d Occured" 1 1@ +end + + +// write 2 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 2 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 3 +then + trace "~g~~h~~h~2302 (write_block_to_file), #5 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #5 FAILED!~n~%d Expected~n~%d Occured" 3 1@ +end + + +// write 3 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 3 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 6 +then + trace "~g~~h~~h~2302 (write_block_to_file), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #6 FAILED!~n~%d Expected~n~%d Occured" 6 1@ +end + + +// write 4 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 4 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 10 +then + trace "~g~~h~~h~2302 (write_block_to_file), #7 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #7 FAILED!~n~%d Expected~n~%d Occured" 10 1@ +end + + +// write 5 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 5 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 15 +then + trace "~g~~h~~h~2302 (write_block_to_file), #8 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #8 FAILED!~n~%d Expected~n~%d Occured" 15 1@ +end + + +// write 9 bytes +wait 0 +2@ = 0x33221100 +3@ = 0x77665544 +4@ = 0xBBAA9988 +5@ = 0xFFEECCDD +6@ = get_var_pointer 3@ +2302: write_block_to_file 0@ {size} 9 {source} 6@ // tested opcode +1@ = get_file_size 0@ +if + 1@ == 24 +then + trace "~g~~h~~h~2302 (write_block_to_file), #9 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #9 FAILED!~n~%d Expected~n~%d Occured" 24 1@ +end + + +// close file +wait 0 +close_file 0@ +trace "~g~~h~~h~2302 (write_block_to_file), #10 PASSED" + + +// reopen to read +wait 0 +if + 0@ = open_file Test_Filename {mode} "r" +then + trace "~g~~h~~h~2302 (write_block_to_file), #11 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #11 FAILED! Failed to open '%s' file." Test_Filename +end + + +// read and verify the data +wait 0 +2@ = 0 +3@ = 0 +4@ = 0 +5@ = 0 +read_from_file 0@ {size} 24 {destination} 2@ +if + 2@ == 0x44554444 + 3@ == 0x55446655 + 4@ == 0x55447766 + 5@ == 0x44887766 +then + trace "~g~~h~~h~2302 (write_block_to_file), #12 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #12 FAILED!~n~%08x %08x %08x %08x Expected~n~%08x %08x %08x %08x Occured" 0x44554444 0x55446655 0x55447766 0x44887766 2@ 3@ 4@ 5@ +end + + +// close file +wait 0 +close_file 0@ +trace "~g~~h~~h~2302 (write_block_to_file), #13 PASSED" + + +// delete file +wait 0 +if + delete_file Test_Filename +then + trace "~g~~h~~h~2302 (write_block_to_file), #14 PASSED" +else + breakpoint "~r~~h~~h~~h~2302 (write_block_to_file), #12 FAILED! Failed to delete '%s' file." Test_Filename +end + + +terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/2407.txt b/tests/cleo_tests/MemoryOperations/2407.txt new file mode 100644 index 00000000..d842559e --- /dev/null +++ b/tests/cleo_tests/MemoryOperations/2407.txt @@ -0,0 +1,164 @@ +{$CLEO .s} +{$USE debug} +{$USE file} +{$USE bitwise} +var 0@ : Integer +var 1@ : Integer +var 2@ : Integer +var 3@ : Integer +var 4@ : Integer +var 5@ : Integer +var 6@ : Integer +var 7@ : Integer +var 8@ : Integer +var 9@ : Integer +var 10@ : Integer + +script_name "2407" // is_memory_equal +debug_on + +trace "2407 (is_memory_equal)" + + +// compare 0 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 0 +then + trace "~g~~h~~h~2407 (is_memory_equal), #1 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #1 FAILED!~n~Incorrect condition result" +end + + +// compare 1 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 1 +then + trace "~g~~h~~h~2407 (is_memory_equal), #2 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #2 FAILED!~n~Incorrect condition result" +end + + +// compare 2 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 2 +then + trace "~g~~h~~h~2407 (is_memory_equal), #3 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #3 FAILED!~n~Incorrect condition result" +end + + +// compare 3 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 3 +then + trace "~g~~h~~h~2407 (is_memory_equal), #4 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #4 FAILED!~n~Incorrect condition result" +end + + +// compare 4 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 4 +then + trace "~g~~h~~h~2407 (is_memory_equal), #5 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #5 FAILED!~n~Incorrect condition result" +end + + +// compare 8 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 8 +then + trace "~g~~h~~h~2407 (is_memory_equal), #6 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #6 FAILED!~n~Incorrect condition result" +end + + +// compare 9 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + not is_memory_equal 0@ 1@ {size} 9 +then + trace "~g~~h~~h~2407 (is_memory_equal), #7 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #7 FAILED!~n~Incorrect condition result" +end + + +// compare 16 +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + not is_memory_equal 0@ 1@ {size} 16 +then + trace "~g~~h~~h~2407 (is_memory_equal), #8 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #8 FAILED!~n~Incorrect condition result" +end + + +// compare 8 again in case memory was modified +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + is_memory_equal 0@ 1@ {size} 8 +then + trace "~g~~h~~h~2407 (is_memory_equal), #9 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #9 FAILED!~n~Incorrect condition result" +end + + +// compare 9 again in case memory was modified +wait 0 +0@ = get_label_pointer @DATA_A +1@ = get_label_pointer @DATA_B +if + not is_memory_equal 0@ 1@ {size} 9 +then + trace "~g~~h~~h~2407 (is_memory_equal), #10 PASSED" +else + breakpoint "~r~~h~~h~~h~2407 (is_memory_equal), #10 FAILED!~n~Incorrect condition result" +end + + +terminate_this_custom_script + + +:DATA_A +hex + "12345678abcdef" 00 +end + +:DATA_B +hex + "1234567812345" 00 +end From a309ebfa43141bdbdb64c37c66dca74d3b9825f1 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 7 Mar 2024 00:54:18 +0100 Subject: [PATCH 116/216] Restored original return type in CLEO_GetOperandType export. (#95) --- cleo_plugins/DebugUtils/DebugUtils.cpp | 4 +-- .../FileSystemOperations.cpp | 6 ++--- cleo_sdk/CLEO.h | 2 +- source/CCustomOpcodeSystem.cpp | 26 +++++++++---------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index d0c1d36b..408e420c 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -199,13 +199,13 @@ class DebugUtils std::string name = ""; // bool param - blocking - auto paramType = CLEO_GetOperandType(thread); + auto paramType = thread->PeekDataType(); if(paramType == DT_BYTE) { blocking = CLEO_GetIntOpcodeParam(thread) != 0; } - paramType = CLEO_GetOperandType(thread); + paramType = thread->PeekDataType(); if (paramType == eDataType::DT_END) { thread->IncPtr(); // consume arguments terminator diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index a8a98217..bd02cc15 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -86,7 +86,7 @@ class FileSystemOperations { const char* path; - auto paramType = CLEO_GetOperandType(thread); + auto paramType = thread->PeekDataType(); if (IsImmInteger(paramType) || IsVariable(paramType)) { // numbered predefined paths @@ -117,7 +117,7 @@ class FileSystemOperations OPCODE_READ_PARAM_FILEPATH(filename); char mode[16]; - auto paramType = CLEO_GetOperandType(thread); + auto paramType = thread->PeekDataType(); if (IsImmInteger(paramType) || IsVariable(paramType)) { // integer param (for backward compatibility with CLEO 3) @@ -361,7 +361,7 @@ class FileSystemOperations size_t paramCount = 0; SCRIPT_VAR* outputParams[35]; - while (CLEO_GetOperandType(thread) != eDataType::DT_END) + while (thread->PeekDataType() != eDataType::DT_END) { // TODO: if target param is string variable it should be handled correctly outputParams[paramCount++] = CLEO_GetPointerToScriptVariable(thread); diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index b51c4e37..625e11a2 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -477,7 +477,7 @@ void WINAPI CLEO_SetScriptWorkDir(CRunningScript* thread, const char* path); void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); -eDataType WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peek parameter data type +int WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peek parameter data type. Returns int for legacy reason, should be eDataType. DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek remaining var-args count extern SCRIPT_VAR* opcodeParams; diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c0e9b9f8..37a38227 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -467,7 +467,7 @@ namespace CLEO StringParamBufferInfo result; CCustomOpcodeSystem::lastErrorMsg.clear(); - auto paramType = CLEO_GetOperandType(thread); + auto paramType = thread->PeekDataType(); if (IsImmInteger(paramType) || IsVariable(paramType)) { // address to output buffer @@ -570,7 +570,7 @@ namespace CLEO char *buffiter = bufa; //get width - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); _itoa(opcodeParams[0].dwParam, buffiter, 10); while (*buffiter) @@ -592,7 +592,7 @@ namespace CLEO if (*iter == '*') { char *buffiter = bufa; - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); _itoa(opcodeParams[0].dwParam, buffiter, 10); while (*buffiter) @@ -610,7 +610,7 @@ namespace CLEO { case 's': { - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; const char* str = ReadStringParam(thread, bufa, sizeof(bufa)); if(str == nullptr) // read error @@ -630,7 +630,7 @@ namespace CLEO case 'c': if (written++ >= len) goto _ReadFormattedString_OutOfMemory; - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); *outIter++ = (char)opcodeParams[0].nParam; iter++; @@ -643,7 +643,7 @@ namespace CLEO char *bufaiter = bufa; if (*iter == 'p' || *iter == 'P') { - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, "%08X", opcodeParams[0].dwParam); } @@ -656,13 +656,13 @@ namespace CLEO *iter == 'f' || *iter == 'F' || *iter == 'g' || *iter == 'G') { - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, fmtbufa, opcodeParams[0].fParam); } else { - if (CLEO_GetOperandType(thread) == DT_END) goto _ReadFormattedString_ArgMissing; + if (thread->PeekDataType() == DT_END) goto _ReadFormattedString_ArgMissing; GetScriptParams(thread, 1); sprintf(bufaiter, fmtbufa, opcodeParams[0].pParam); } @@ -691,7 +691,7 @@ namespace CLEO } // still more var-args available - if (CLEO_GetOperandType(thread) != DT_END) + if (thread->PeekDataType() != DT_END) { CCustomOpcodeSystem::lastErrorMsg = "More params than slots in formatted string"; LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); @@ -825,7 +825,7 @@ namespace CLEO void SkipUnusedVarArgs(CRunningScript *thread) { - while (CLEO_GetOperandType(thread) != DT_END) + while (thread->PeekDataType() != DT_END) CLEO_SkipOpcodeParams(thread, 1); thread->IncPtr(); // skip terminator @@ -836,7 +836,7 @@ namespace CLEO const auto ip = thread->GetBytePointer(); DWORD count = 0; - while (CLEO_GetOperandType(thread) != DT_END) + while (thread->PeekDataType() != DT_END) { CLEO_SkipOpcodeParams(thread, 1); count++; @@ -2031,9 +2031,9 @@ extern "C" ThreadJump(thread, labelPtr); } - eDataType WINAPI CLEO_GetOperandType(const CLEO::CRunningScript* thread) + int WINAPI CLEO_GetOperandType(const CLEO::CRunningScript* thread) { - return (eDataType )*thread->GetBytePointer(); + return (int)thread->PeekDataType(); } void WINAPI CLEO_RetrieveOpcodeParams(CLEO::CRunningScript *thread, int count) From 6a2146494d8306e3a909815c35c9ffa3d4034f64 Mon Sep 17 00:00:00 2001 From: Seemann Date: Wed, 6 Mar 2024 18:56:37 -0500 Subject: [PATCH 117/216] CLEO testing library (#92) --- .../cleo_tests/FilesystemOperations/0A9A.txt | 68 +++-- tests/cleo_tests/MemoryOperations/0A8C.txt | 254 ++++++------------ tests/cleo_tests/MemoryOperations/0A8D.txt | 213 ++++++--------- tests/cleo_tests/MemoryOperations/0A96.txt | 44 ++- tests/cleo_tests/MemoryOperations/0A97.txt | 51 ++-- tests/cleo_tests/MemoryOperations/0A98.txt | 49 ++-- tests/cleo_tests/MemoryOperations/0AA4.txt | 35 +-- tests/cleo_tests/MemoryOperations/0AC6.txt | 63 ++--- tests/cleo_tests/MemoryOperations/0AC7.txt | 53 ++-- tests/cleo_tests/MemoryOperations/0AC8.txt | 68 ++--- tests/cleo_tests/MemoryOperations/0AC9.txt | 41 ++- tests/cleo_tests/MemoryOperations/0AE9.txt | 48 ++-- tests/cleo_tests/MemoryOperations/0AEA.txt | 52 ++-- tests/cleo_tests/MemoryOperations/0AEB.txt | 58 ++-- tests/cleo_tests/MemoryOperations/0AEC.txt | 57 ++-- tests/cleo_tests/cleo_tester.txt | 170 ++++++++++++ 16 files changed, 600 insertions(+), 724 deletions(-) create mode 100644 tests/cleo_tests/cleo_tester.txt diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index bf7c5b1e..9e1c91f2 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -1,48 +1,40 @@ {$CLEO .s} -{$USE debug} -{$USE file} -{$USE bitwise} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0A9A" // open_file debug_on -trace "0A9A (open_file)" - +test("0A9A (open_file)", @tests) +terminate_this_custom_script -wait 0 -// try open non existing file -if - // test 0A9A - 0@ = open_file "cleo\not_a_file.txt" {mode} "r" // tested opcode -then - breakpoint "~r~~h~~h~~h~0A9A (open_file), #0 FAILED! Opened non existing file?" -else - trace "~g~~h~~h~0A9A (open_file), #0 PASSED" -end +function tests + + it("should fail on a non-existing file", @test1) + it("should open existing file", @test2) + + return true + + function test1 + if + 0@ = open_file "cleo\not_a_file.txt" {mode} "r" // tested opcode + then + assert(false) + else + assert(true) + end + end + + function test2 + if + 0@ = open_file "cleo\.cleo.log" {mode} "r" // tested opcode + then + assert(true) + close_file 0@ + else + assert(false) + end + end -wait 0 -// try open non existing file -if - // test 0A9A - 0@ = open_file "cleo\.cleo.log" {mode} "r" // tested opcode -then - trace "~g~~h~~h~0A9A (open_file), #1 PASSED" - close_file 0@ -else - breakpoint "~r~~h~~h~~h~0A9A (open_file), #1 FAILED! Failed to open file." end - -terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0A8C.txt b/tests/cleo_tests/MemoryOperations/0A8C.txt index e80dc1c5..3d78f687 100644 --- a/tests/cleo_tests/MemoryOperations/0A8C.txt +++ b/tests/cleo_tests/MemoryOperations/0A8C.txt @@ -1,180 +1,88 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} -script_name "0A8C" // write_memory -debug_on - -trace "0A8C (write_memory)" - -wait 0 -// write 0 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 0 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0xdddddddd - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8C (write_memory), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #0 FAILED!~n~cccccccc dddddddd eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// write 1 byte -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 1 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0xdddddd44 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8C (write_memory), #1 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #1 FAILED!~n~cccccccc dddddd44 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// write 2 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 2 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0xdddd3344 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8C (write_memory), #2 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #2 FAILED!~n~cccccccc dddd3344 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// write 3 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 3 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0xdd444444 // memset behavior - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8C (write_memory), #3 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #3 FAILED!~n~cccccccc dd444444 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// write 4 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 4 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x11223344 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8C (write_memory), #4 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #4 FAILED!~n~cccccccc 11223344 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// write 5 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 5 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x44444444 - 3@ == 0xeeeeee44 -then - trace "~g~~h~~h~0A8C (write_memory), #5 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #5 FAILED! Expected: cccccccc, 44444444, eeeeee44 Actual: %08x %08x %08x" 1@ 2@ 3@ -end - - -wait 0 -// write 7 bytes -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8C: write_memory {address} 0@ {size} 7 {value} 0x11223344 {vp} false // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x44444444 - 3@ == 0xee444444 -then - trace "~g~~h~~h~0A8C (write_memory), #6 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #6 FAILED! Expected: cccccccc, 44444444, ee444444 Actual: %08x %08x %08x" 1@ 2@ 3@ -end +script_name '0A8C' +test("0A8C (write_memory)", @tests) +terminate_this_custom_script -wait 0 -// write float -get_var_pointer 2@ {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee -4@ = 100.0 +function tests + before_each(@before) + + it("should write 0 bytes", @test1) + it("should write 1 byte", @test2) + it("should write 2 bytes", @test3) + it("should write 3 bytes", @test4) + it("should write 4 bytes", @test5) + it("should write 5 bytes", @test6) + it("should write 7 bytes", @test7) + it("should write float", @test8) + + return true + + :before + get_var_pointer 2@ {store_to} 0@ + 1@ = 0xcccccccc + 2@ = 0xdddddddd + 3@ = 0xeeeeeeee + 0051: return + + function test1 + write_memory {address} 0@ {size} 0 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0xdddddddd) + assert_eq(3@, 0xeeeeeeee) + end + + function test2 + write_memory {address} 0@ {size} 1 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0xdddddd44) + assert_eq(3@, 0xeeeeeeee) + end + + function test3 + write_memory {address} 0@ {size} 2 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0xdddd3344) + assert_eq(3@, 0xeeeeeeee) + end + + function test4 + write_memory {address} 0@ {size} 3 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0xdd444444) // memset behavior + assert_eq(3@, 0xeeeeeeee) + end + + function test5 + write_memory {address} 0@ {size} 4 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x11223344) + assert_eq(3@, 0xeeeeeeee) + end + + function test6 + write_memory {address} 0@ {size} 5 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x44444444) + assert_eq(3@, 0xeeeeee44) + end + + function test7 + write_memory {address} 0@ {size} 7 {value} 0x11223344 {vp} false + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x44444444) + assert_eq(3@, 0xee444444) + end + + function test8 + 4@ = 100.0 + 0A8C: write_memory {address} 0@ {size} 4 {value} 4@ {vp} false // tested opcode + assert_eq(1@, 0xcccccccc) + assert_eqf(2@, 100.0) + assert_eq(3@, 0xeeeeeeee) + end -0A8C: write_memory {address} 0@ {size} 4 {value} 4@ {vp} false // tested opcode -if - 2@ == 100.0 -then - trace "~g~~h~~h~0A8C (write_memory), #7 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8C (write_memory), #7 FAILED!~n~cccccccc 100.0 eeeeeeee Expected~n~%08x %f %08x Occured" 1@ 2@ 3@ end - - -terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0A8D.txt b/tests/cleo_tests/MemoryOperations/0A8D.txt index 17bf80e3..4bca250f 100644 --- a/tests/cleo_tests/MemoryOperations/0A8D.txt +++ b/tests/cleo_tests/MemoryOperations/0A8D.txt @@ -1,142 +1,81 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer - -script_name "0A8D" // read_memory -debug_on - -trace "0A8D (read_memory)" - -wait 0 -// read 0 bytes -0AC6: get_label_pointer @DATA {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8D: read_memory {address} 0@ {size} 0 {vp} false {result} 2@ // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x00000000 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8D (read_memory), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #0 FAILED!~n~cccccccc 00000000 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// read 0 bytes -0AC6: get_label_pointer @DATA {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8D: read_memory {address} 0@ {size} 1 {vp} false {result} 2@ // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x00000044 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8D (read_memory), #1 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #1 FAILED!~n~cccccccc 00000044 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// read 2 bytes -0AC6: get_label_pointer @DATA {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8D: read_memory {address} 0@ {size} 2 {vp} false {result} 2@ // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x00003344 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8D (read_memory), #2 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #2 FAILED!~n~cccccccc 00003344 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// read 3 bytes -0AC6: get_label_pointer @DATA {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8D: read_memory {address} 0@ {size} 3 {vp} false {result} 2@ // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x00223344 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8D (read_memory), #3 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #3 FAILED!~n~cccccccc 00223344 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - - -wait 0 -// read 4 bytes -0AC6: get_label_pointer @DATA {store_to} 0@ -1@ = 0xcccccccc -2@ = 0xdddddddd -3@ = 0xeeeeeeee - -0A8D: read_memory {address} 0@ {size} 4 {vp} false {result} 2@ // tested opcode - -if and - 1@ == 0xcccccccc - 2@ == 0x11223344 - 3@ == 0xeeeeeeee -then - trace "~g~~h~~h~0A8D (read_memory), #4 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #4 FAILED!~n~cccccccc 11223344 eeeeeeee Expected~n~%08x %08x %08x Occured" 1@ 2@ 3@ -end - -wait 0 -// read float value -0@ = 125.0 -2@ = 0 -get_var_pointer 0@ {store_to} 1@ -0A8D: read_memory {address} 1@ {size} 4 {vp} false {result} 2@ - -if - 2@ == 125.0 -then - trace "~g~~h~~h~0A8D (read_memory), #5 PASSED" -else - breakpoint "~r~~h~~h~~h~0A8D (read_memory), #5 FAILED!~n~125.0 Expected~n~%f Occured" 2@ -end +{$INCLUDE_ONCE ../cleo_tester.txt} +script_name '0A8D' +test("0A8D (read_memory)", @tests) terminate_this_custom_script - -:DATA -hex - 44 33 22 11 - "some longer testing text" 00 +function tests + before_each(@prepare_tests) + + it("should read 0 bytes", @test1) + it("should read 1 byte", @test2) + it("should read 2 bytes", @test3) + it("should read 3 bytes", @test4) + it("should read 4 bytes", @test5) + it("should read float", @test6) + + return true + + :prepare_tests + 0@ = get_label_pointer @DATA + 1@ = 0xcccccccc + 2@ = 0xdddddddd + 3@ = 0xeeeeeeee + 0051: return // sanny does not allow single `return` command in function context + + function test1 + 2@ = read_memory {address} 0@ {size} 0 {vp} false + + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x00000000) + assert_eq(3@, 0xeeeeeeee) + end + + function test2 + 2@ = read_memory {address} 0@ {size} 1 {vp} false + + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x00000044) + assert_eq(3@, 0xeeeeeeee) + end + + function test3 + 2@ = read_memory {address} 0@ {size} 2 {vp} false + + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x00003344) + assert_eq(3@, 0xeeeeeeee) + end + + function test4 + 2@ = read_memory {address} 0@ {size} 3 {vp} false + + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x00223344) + assert_eq(3@, 0xeeeeeeee) + end + + function test5 + 2@ = read_memory {address} 0@ {size} 4 {vp} false + + assert_eq(1@, 0xcccccccc) + assert_eq(2@, 0x11223344) + assert_eq(3@, 0xeeeeeeee) + end + + function test6 + 0@ = 125.0 + 2@ = 0 + get_var_pointer 0@ {store_to} 1@ + 2@ = read_memory {address} 1@ {size} 4 {vp} false + + assert_eq(2@, 125.0) + end + + :DATA + hex + 44 33 22 11 + "some longer testing text" 00 + end end diff --git a/tests/cleo_tests/MemoryOperations/0A96.txt b/tests/cleo_tests/MemoryOperations/0A96.txt index c6260775..4fb436b1 100644 --- a/tests/cleo_tests/MemoryOperations/0A96.txt +++ b/tests/cleo_tests/MemoryOperations/0A96.txt @@ -1,34 +1,20 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0A96" // get_ped_pointer -debug_on - -trace "0A96 (get_ped_pointer)" - -wait 0 -get_player_char 0 {handle} 0@ -1@ = 0 -0A96: get_ped_pointer 0@ {address} 1@ +test("0A96 (get_ped_pointer)", @tests) +terminate_this_custom_script -if - 1@ > 0x10000 // possibly valid pointer -then - trace "~g~~h~~h~0A96 (get_ped_pointer), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0A96 (get_ped_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ -end +function tests -terminate_this_custom_script + it("should return valid pointer", @test1) + + return true + + function test1 + int handle = get_player_char 0 + int ptr = get_ped_pointer handle + + assert_ptr(ptr) + end +end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0A97.txt b/tests/cleo_tests/MemoryOperations/0A97.txt index 9b8d1098..034ad422 100644 --- a/tests/cleo_tests/MemoryOperations/0A97.txt +++ b/tests/cleo_tests/MemoryOperations/0A97.txt @@ -1,41 +1,24 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0A97" // get_vehicle_pointer -debug_on - -trace "0A97 (get_vehicle_pointer)" - - -wait 0 -request_model 400 -load_all_models_now -create_car 400 {xyz} 0.0 0.0 0.0 {result} 0@ - -1@ = 0 -0A97: get_vehicle_pointer 0@ {result} 1@ // tested opcode +test("0A97 (get_vehicle_pointer)", @tests) +terminate_this_custom_script -mark_car_as_no_longer_needed 0@ +function tests -if - 1@ > 0x10000 // possibly valid pointer -then - trace "~g~~h~~h~0A97 (get_vehicle_pointer), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0A97 (get_vehicle_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ -end + it("should return a valid pointer", @test1) + return true + + function test1 + request_model 400 + load_all_models_now + int handle = create_car 400 {xyz} 0.0 0.0 0.0 + int ptr = 0 + ptr = get_vehicle_pointer handle + mark_car_as_no_longer_needed handle -terminate_this_custom_script + assert_ptr(ptr) + end +end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0A98.txt b/tests/cleo_tests/MemoryOperations/0A98.txt index 2d8c264f..3b6bf28f 100644 --- a/tests/cleo_tests/MemoryOperations/0A98.txt +++ b/tests/cleo_tests/MemoryOperations/0A98.txt @@ -1,41 +1,24 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0A98" // get_vehicle_pointer -debug_on - -trace "0A98 (get_object_pointer)" - - -wait 0 -request_model 333 // golf club -load_all_models_now -create_object 333 {xyz} 0.0 0.0 0.0 {result} 0@ +test("0A98 (get_object_pointer)", @tests) +terminate_this_custom_script -1@ = 0 -0A98: get_object_pointer 0@ {result} 1@ // tested opcode +function tests -mark_object_as_no_longer_needed 0@ + it("should return a valid pointer", @test1) + return true -if - 1@ > 0x10000 // possibly valid pointer -then - trace "~g~~h~~h~0A98 (get_object_pointer), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0A98 (get_object_pointer), #0 FAILED!~n~%d" 1@ 2@ 3@ -end + function test1 + request_model 333 // golf club + load_all_models_now + int handle = create_object 333 {xyz} 0.0 0.0 0.0 + int ptr = 0 + ptr = get_object_pointer handle + mark_object_as_no_longer_needed handle -terminate_this_custom_script + assert_ptr(ptr) + end +end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt index fe3111f3..2192b551 100644 --- a/tests/cleo_tests/MemoryOperations/0AA4.txt +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -1,22 +1,27 @@ {$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AA4" -debug_on +test("0AA4 (get_dynamic_library_procedure)", @tests) +terminate_this_custom_script -trace "0AA4 get_dynamic_library_procedure" -wait 0 +function tests -int load_library_addr = read_memory 0x858070 4 false -trace "Address of LoadLibrary function is %d" load_library_addr + it("should return address of Sleep function from kernel32.dll", @test1) + return true + + function test1 + int load_library_addr = read_memory 0x858070 4 false + trace "Address of LoadLibrary function is %d" load_library_addr -int kernel_dll_addr = call_function_return {address} load_library_addr {numParams} 1 {pop} 0 {funcParams} "kernel32.dll" // tested opcode -if - // lib address can be any valid pointer, not necessarily one loaded with 0AA2 opcode - int sleep_addr = get_dynamic_library_procedure {procName} "Sleep" {DynamicLibrary} kernel_dll_addr -then - trace "~g~~h~~h~0AA4 (get_dynamic_library_procedure), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AA4 (get_dynamic_library_procedure), #0 FAILED! Can't find procedure" + int kernel_dll_addr = call_function_return {address} load_library_addr {numParams} 1 {pop} 0 {funcParams} "kernel32.dll" // tested opcode + if + // lib address can be any valid pointer, not necessarily one loaded with 0AA2 opcode + int sleep_addr = get_dynamic_library_procedure {procName} "Sleep" {DynamicLibrary} kernel_dll_addr + then + assert(true) + else + assert(false) + end + end end - -terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0AC6.txt b/tests/cleo_tests/MemoryOperations/0AC6.txt index cbe00ce5..3ae125fe 100644 --- a/tests/cleo_tests/MemoryOperations/0AC6.txt +++ b/tests/cleo_tests/MemoryOperations/0AC6.txt @@ -1,45 +1,28 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer - -script_name "0AC6" // get_vehicle_pointer -debug_on - -trace "0AC6 (get_label_pointer)" - - -wait 0 -1@ = 0 -0AC6: get_label_pointer @DATA {result} 1@ // tested opcode - -2@ = 0xCCCCCCCC -read_memory 1@ {size} 4 {vp} false {result} 2@ - -if and - 1@ > 0x10000 // possibly valid pointer - 2@ == 0x11223344 -then - trace "~g~~h~~h~0AC6 (get_label_pointer), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AC6 (get_label_pointer), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ -end - +{$INCLUDE_ONCE ../cleo_tester.txt} +script_name "0AC6" // get_label_pointer +test("0AC6 (get_label_pointer)", @tests) terminate_this_custom_script -:DATA -hex - 44 33 22 11 - "some longer testing text" 00 +function tests + + it("should return valid pointer", @test1) + return true + + function test1 + int ptr = 0 + ptr = get_label_pointer @DATA + int number = 0xCCCCCCCC + number = read_memory ptr {size} 4 {vp} false + assert_ptr(ptr) + assert_eq(number, 0x11223344) + end + + :DATA + hex + 44 33 22 11 + "some longer testing text" 00 + end + end diff --git a/tests/cleo_tests/MemoryOperations/0AC7.txt b/tests/cleo_tests/MemoryOperations/0AC7.txt index 64c9b2f8..f202c8d6 100644 --- a/tests/cleo_tests/MemoryOperations/0AC7.txt +++ b/tests/cleo_tests/MemoryOperations/0AC7.txt @@ -1,39 +1,30 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} -script_name "0AC7" // get_vehicle_pointer -debug_on - -trace "0AC7 (get_var_pointer)" +script_name "0AC7" // get_var_pointer +test("0AC7 (get_var_pointer)", @tests) +terminate_this_custom_script +function tests -wait 0 -1@ = 0x11223344 -0AC7: get_var_pointer 1@ {result} 0@ // tested opcode + it("should return valid pointer", @test1) + return true -2@ = 0xCCCCCCCC -read_memory 0@ {size} 4 {vp} false {result} 2@ + function test1 + int val1 = 0x11223344 + int val2 = 0xCCCCCCCC + int ptr = get_var_pointer val1 + val2 = read_memory ptr {size} 4 {vp} false -if and - 0@ > 0x10000 // possibly valid pointer - 2@ == 0x11223344 -then - trace "~g~~h~~h~0AC7 (get_var_pointer), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AC7 (get_var_pointer), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ -end + assert_ptr(ptr) + assert_eq(val1, 0x11223344) + assert_eq(val2, 0x11223344) + end + :DATA + hex + 44 33 22 11 + "some longer testing text" 00 + end -terminate_this_custom_script +end diff --git a/tests/cleo_tests/MemoryOperations/0AC8.txt b/tests/cleo_tests/MemoryOperations/0AC8.txt index 86569a8f..03d92f0a 100644 --- a/tests/cleo_tests/MemoryOperations/0AC8.txt +++ b/tests/cleo_tests/MemoryOperations/0AC8.txt @@ -1,50 +1,40 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AC8" // allocate_memory -debug_on - -trace "0AC8 (allocate_memory)" +test("0AC8 (allocate_memory)", @tests) +terminate_this_custom_script +function tests -wait 0 -0@ = 0x11223344 -0AC8: allocate_memory {size} 4 {result} 0@ // tested opcode + before_each(@allocate) + after_each(@free) + + it("should return valid pointer", @test1) + it("should point to zero-filled mem in CLEO5", @test2) -if and - 0@ > 0x10000 // possibly valid pointer - 0@ <> 0x11223344 -then - trace "~g~~h~~h~0AC8 (allocate_memory), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AC8 (allocate_memory), #0 FAILED!~n~11223344 Expected~n~%08x Occured" 2@ -end + return true + function test1 + // 0@ is set in before_each callback + assert_ptr(0@) + assert_neq(0@, 0x11223344) + end + + function test2 + 2@ = 0xCCCCCCCC + 2@ = read_memory 0@ {size} 4 {vp} false + assert_eq(2@, 0) + end -wait 0 -// in CLEO5 expect to alloceted memory be prefilled with zeros -2@ = 0xCCCCCCCC -read_memory 0@ {size} 4 {vp} false {result} 2@ + :allocate + 0@ = 0x11223344 + 0@ = allocate_memory {size} 4 + 0051: return -if - 2@ == 0 -then - trace "~g~~h~~h~0AC8 (allocate_memory), #1 PASSED" -else - breakpoint "~r~~h~~h~~h~0AC8 (allocate_memory), #1 FAILED!~n~00000000 Expected~n~%08x Occured" 2@ -end + :free + free_memory {address} 0@ + 0051: return -terminate_this_custom_script +end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0AC9.txt b/tests/cleo_tests/MemoryOperations/0AC9.txt index e6e31a90..120eb3f1 100644 --- a/tests/cleo_tests/MemoryOperations/0AC9.txt +++ b/tests/cleo_tests/MemoryOperations/0AC9.txt @@ -1,32 +1,21 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AC9" // free_memory -debug_on - -trace "0AC9 (free_memory)" - - -wait 0 -0@ = 0x11223344 -allocate_memory {size} 4 {result} 0@ - -0AC9: free_memory 0@ // tested opcode +test("0AC9 (free_memory)", @tests) +terminate_this_custom_script -// not much to check within script. Did not crashed the game or printed error, so perhaps ok -trace "~g~~h~~h~0AC9 (free_memory), #0 PASSED" +function tests + it("should free allocated memory", @test1) + return true + + function test1 + int ptr = 0x11223344 + ptr = allocate_memory {size} 4 + free_memory ptr -terminate_this_custom_script + // not much to check within script. Did not crashed the game or printed error, so perhaps ok + assert(true) + end +end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0AE9.txt b/tests/cleo_tests/MemoryOperations/0AE9.txt index a5223108..6aa9ece7 100644 --- a/tests/cleo_tests/MemoryOperations/0AE9.txt +++ b/tests/cleo_tests/MemoryOperations/0AE9.txt @@ -1,38 +1,30 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AE9" // pop_float -debug_on +test("0AE9 (pop_float)", @tests) +terminate_this_custom_script -trace "0AE9 (pop_float)" +function tests + it("should pop float from stack", @test1) + return true -wait 0 -0@s = '42.5' -get_var_pointer 0@ {result} 2@ -call_function 0x0823CEE {argCount} 1 {pop} 1 {arg} 2@ // double atof(const char *) -0AE9: pop_float {result} 3@ // tested opcode + function test1 + 0@s = '42.5' + 2@ = get_var_pointer 0@ + call_function 0x0823CEE {argCount} 1 {pop} 1 {arg} 2@ // double atof(const char *) -if - 3@ == 42.5 -then - trace "~g~~h~~h~0AE9 (pop_float), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AE9 (pop_float), #0 FAILED!~n~42.5 Expected~n~%f Occured" 3@ -end + pop_float {result} 3@ + if + 3@ == 42.5 + then + assert(true) + else + assert(false) + end -terminate_this_custom_script + end +end diff --git a/tests/cleo_tests/MemoryOperations/0AEA.txt b/tests/cleo_tests/MemoryOperations/0AEA.txt index 951463b8..fc1b5de2 100644 --- a/tests/cleo_tests/MemoryOperations/0AEA.txt +++ b/tests/cleo_tests/MemoryOperations/0AEA.txt @@ -1,37 +1,25 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AEA" // get_ped_ref -debug_on - -trace "0AEA (get_ped_ref)" - - -wait 0 -get_player_char 0 {handle} 0@ -get_ped_pointer 0@ {address} 1@ - -0AEA: get_ped_ref 1@ {result} 2@ // tested opcode +test("0AEA (get_ped_ref)", @tests) +terminate_this_custom_script -if - 0@ == 2@ -then - trace "~g~~h~~h~0AEA (get_ped_ref), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEA (get_ped_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +function tests + it("should return ped handle for pointer", @test1) + return true + + function test1 + int handle = get_player_char 0 + int ptr = get_ped_pointer handle + int handle2 = get_ped_ref ptr + + if + handle == handle2 + then + assert(true) + else + assert(false) + end + end end - - -terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0AEB.txt b/tests/cleo_tests/MemoryOperations/0AEB.txt index 0c804131..9647cada 100644 --- a/tests/cleo_tests/MemoryOperations/0AEB.txt +++ b/tests/cleo_tests/MemoryOperations/0AEB.txt @@ -1,41 +1,29 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AEB" // get_vehicle_ref -debug_on - -trace "0AEB (get_vehicle_ref)" - - -wait 0 -request_model 400 -load_all_models_now -create_car 400 {xyz} 0.0 0.0 0.0 {result} 0@ -get_vehicle_pointer 0@ {result} 1@ - -0AEB: get_vehicle_ref 1@ {result} 2@ // tested opcode - -mark_car_as_no_longer_needed 0@ +test("0AEB (get_vehicle_ref)", @tests) +terminate_this_custom_script -if - 0@ == 2@ -then - trace "~g~~h~~h~0AEA (get_vehicle_ref), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEA (get_vehicle_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +function tests + it("should return vehicle handle for pointer", @test1) + return true + + function test1 + request_model 400 + load_all_models_now + int handle = create_car 400 {xyz} 0.0 0.0 0.0 + int ptr = get_vehicle_pointer handle + int handle2 = get_vehicle_ref ptr + mark_car_as_no_longer_needed handle + + if + handle == handle2 + then + assert(true) + else + assert(false) + end + end end - -terminate_this_custom_script diff --git a/tests/cleo_tests/MemoryOperations/0AEC.txt b/tests/cleo_tests/MemoryOperations/0AEC.txt index 4b60dac2..57333051 100644 --- a/tests/cleo_tests/MemoryOperations/0AEC.txt +++ b/tests/cleo_tests/MemoryOperations/0AEC.txt @@ -1,41 +1,30 @@ {$CLEO .s} -{$USE debug} -{$USE memory} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer +{$INCLUDE_ONCE ../cleo_tester.txt} script_name "0AEC" // get_object_ref -debug_on - -trace "0AEC (get_object_ref)" - - -wait 0 -request_model 333 // golf club -load_all_models_now -create_object 333 {xyz} 0.0 0.0 0.0 {result} 0@ -get_object_pointer 0@ {result} 1@ - -0AEC: get_object_ref 1@ {result} 2@ // tested opcode - -mark_object_as_no_longer_needed 0@ +test("0AEC (get_object_ref)", @tests) +terminate_this_custom_script -if - 0@ == 2@ -then - trace "~g~~h~~h~0AEC (get_object_ref), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEC (get_object_ref), #0 FAILED!~n~%08x Expected~n~%08x Occured" 0@ 2@ +function tests + it("should return vehicle handle for pointer", @test1) + return true + + function test1 + request_model 333 // golf club + load_all_models_now + int handle = create_object 333 {xyz} 0.0 0.0 0.0 + int ptr = get_object_pointer handle + int handle2 = get_object_ref ptr + mark_object_as_no_longer_needed handle + + if + handle == handle2 + then + assert(true) + else + assert(false) + end + end end -terminate_this_custom_script diff --git a/tests/cleo_tests/cleo_tester.txt b/tests/cleo_tests/cleo_tester.txt new file mode 100644 index 00000000..29be0a8e --- /dev/null +++ b/tests/cleo_tests/cleo_tester.txt @@ -0,0 +1,170 @@ + +const VAR_TEST_INDEX = 0 +const VAR_BEFORE_EACH = 1 +const VAR_AFTER_EACH = 2 +const VAR_SPEC = 3 +const VAR_ASSERT_INDEX = 4 + +jump @_cleo_tester_skip_fns + +function _cleo_tester_read_var(index: int): int + int buf = get_label_pointer @_cleo_tester_shared_vars + index *= 4 + int value = read_memory_with_offset {address} buf {offset} index {size} 4 + return true value +end + +function _cleo_tester_write_var(index: int, value: int) + int buf = get_label_pointer @_cleo_tester_shared_vars + index *= 4 + write_memory_with_offset {address} buf {offset} index {size} 4 {value} value +end + +/// registers new test suite (collection of unit tests) +/// use it(...) for individual unit tests +function test(suite_name: integer, callback: int) + debug_on + + int suite_name_buf = get_label_pointer @_cleo_tester_test_name + copy_memory {src} suite_name {dest} suite_name_buf {size} 255 // used in an it trace + trace "Testing %s" suite_name + + _cleo_tester_write_var(VAR_BEFORE_EACH, @_cleo_tester_stub) + _cleo_tester_write_var(VAR_AFTER_EACH, @_cleo_tester_stub) + _cleo_tester_write_var(VAR_TEST_INDEX, 1) + cleo_call callback +end + +/// registers new unit test in a test suite +/// use assert_*(...) to validate result +function it(spec_name: integer, callback: int) + define function run_spec + define function inject_offset(label: int, offset: int) + + int index = _cleo_tester_read_var(VAR_TEST_INDEX) + + int spec_name_buf = get_label_pointer @_cleo_tester_spec_name + copy_memory {src} spec_name {dest} spec_name_buf {size} 255 // used in a failed assert + int test_name = get_label_pointer @_cleo_tester_test_name + trace "Test #%d %s" index spec_name + + wait 0 + _cleo_tester_write_var(VAR_SPEC, callback) + _cleo_tester_write_var(VAR_ASSERT_INDEX, 0) + + run_spec + + trace "~g~~h~~h~Test #%d PASSED" index + index++ + _cleo_tester_write_var(VAR_TEST_INDEX, index) + + return true + + function run_spec + // this function should use 0 local variables + + inject_offset(@_cleo_tester_before_each, VAR_BEFORE_EACH) + inject_offset(@_cleo_tester_after_each, VAR_AFTER_EACH) + inject_offset(@_cleo_tester_run, VAR_SPEC) + + :_cleo_tester_before_each + gosub @_cleo_tester_stub + + :_cleo_tester_run + cleo_call @_cleo_tester_stub {numParams} 32 {params} 0@ 1@ 2@ 3@ 4@ 5@ 6@ 7@ 8@ 9@ 10@ 11@ 12@ 13@ 14@ 15@ 16@ 17@ 18@ 19@ 20@ 21@ 22@ 23@ 24@ 25@ 26@ 27@ 28@ 29@ 30@ 31@ + + :_cleo_tester_after_each + gosub @_cleo_tester_stub + end + + // self-patch script code with new gosub/call offset + function inject_offset(label: int, var_index: int) + int function_offset = _cleo_tester_read_var(var_index) + int location = get_label_pointer label + write_memory_with_offset {address} location {offset - opcode + datatype} 3 {size} 4 {value} function_offset + end +end + +:_cleo_tester_stub +return + +:_cleo_tester_shared_vars +hex + 00(20) // 5 variables +end +:_cleo_tester_test_name +hex + 00(256) +end +:_cleo_tester_spec_name +hex + 00(256) +end + +:_cleo_tester_fail +int test_index = _cleo_tester_read_var(VAR_TEST_INDEX) +int test_name = get_label_pointer @_cleo_tester_spec_name +int assert_index = _cleo_tester_read_var(VAR_ASSERT_INDEX) +breakpoint "~r~~h~~h~~h~Test #%d Assert #%d FAILED! %d Expected, %d Actual" test_index assert_index {val1} 0@ {val2} 1@ +terminate_this_custom_script + +function _cleo_tester_increment_assert + int index = _cleo_tester_read_var(VAR_ASSERT_INDEX) + index++ + _cleo_tester_write_var(VAR_ASSERT_INDEX, index) +end + +/// checks if two int values are equal, otherwise stops the test execution +function assert_eq(val1: int, val2: int) + _cleo_tester_increment_assert + val1 == val2 + jf @_cleo_tester_fail +end + +/// checks if two int values are not equal, otherwise stops the test execution +function assert_neq(val1: int, val2: int) + _cleo_tester_increment_assert + val1 <> val2 + jf @_cleo_tester_fail +end + +/// checks if two float values are equal, otherwise stops the test execution +function assert_eqf(val1:float, val2:float) + _cleo_tester_increment_assert + val1 == val2 + jf @_cleo_tester_fail +end + +/// checks if two float values are not equal, otherwise stops the test execution +function assert_neqf(val1:float, val2:float) + _cleo_tester_increment_assert + val1 <> val2 + jf @_cleo_tester_fail +end + +/// checks if value is a valid pointer, otherwise stops the test execution +function assert_ptr(ptr: int) + _cleo_tester_increment_assert + ptr > 0x10000 // possibly valid pointer + jf @_cleo_tester_fail +end + +/// checks if value is not 0, otherwise stops the test execution +function assert(flag: int) + _cleo_tester_increment_assert + flag <> False + jf @_cleo_tester_fail +end + +/// registers a callback that runs before each unit test (test setup) +function before_each(callback: int) + _cleo_tester_write_var(VAR_BEFORE_EACH, callback) +end + +/// registers a callback that runs after each unit test (test teardown) +function after_each(callback: int) + _cleo_tester_write_var(VAR_AFTER_EACH, callback) +end + + +:_cleo_tester_skip_fns \ No newline at end of file From 338f6f8cc53fcc1a9254aab1d663b4228c27506e Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 10 Mar 2024 00:23:00 +0100 Subject: [PATCH 118/216] Added argument names to OpcodeInfoDatabase --- cleo_sdk/CLEO.h | 1 + cleo_sdk/CLEO_Utils.h | 84 +++++++++++++++++++--------------- source/CCustomOpcodeSystem.cpp | 14 ++++++ source/CScriptEngine.cpp | 4 +- source/OpcodeInfoDatabase.cpp | 68 +++++++++++++++++++++++---- source/OpcodeInfoDatabase.h | 26 ++++++++++- source/cleo.def | 33 ++++++------- 7 files changed, 164 insertions(+), 66 deletions(-) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 625e11a2..cbe5c70e 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -468,6 +468,7 @@ void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); // script utils void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages +void WINAPI CLEO_GetScriptParamInfoStr(int idexOffset, char* buf, DWORD bufSize); // short text with current+offset opcode parameter info (index and name if available) eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); // compatibility mode LPCSTR WINAPI CLEO_GetScriptFilename(const CRunningScript* thread); // returns nullptr if provided script ptr is not valid diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index d5751152..b55413aa 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -198,6 +198,14 @@ namespace CLEO } } + static std::string GetParamInfo(int offset = 0) + { + std::string info; + info.resize(32); + CLEO_GetScriptParamInfoStr(offset, info.data(), info.length()); + return info; + } + #define TRACE(format,...) {CLEO::Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} #define LOG_WARNING(script, format, ...) {CLEO::Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} #define SHOW_ERROR(a,...) {CLEO::ShowError(a, __VA_ARGS__);} @@ -313,7 +321,7 @@ namespace CLEO if (!_paramWasString()) { - SHOW_ERROR("Input argument #%d expected to be string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Input argument %s expected to be string, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); thread->Suspend(); _lastParamType = DT_INVALID; // mark error return nullptr; @@ -323,7 +331,7 @@ namespace CLEO if (str == nullptr) // other error? { - SHOW_ERROR("Invalid input argument #%d in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid input argument %s in script %s\nScript suspended.", GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); thread->Suspend(); _lastParamType = DT_INVALID; // mark error return nullptr; @@ -339,14 +347,14 @@ namespace CLEO if (str != nullptr && (size_t)str <= MinValidAddress) { - SHOW_ERROR("Invalid '0x%X' source pointer of output string argument #%d in script %s \nScript suspended.", str, CLEO_GetParamsHandledCount() + 1, ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid '0x%X' source pointer of output string argument %s in script %s \nScript suspended.", str, GetParamInfo(1).c_str(), ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } if (!_paramWasString(true)) { - SHOW_ERROR("Output argument #%d expected to be variable string, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount() + 1, ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Output argument %s expected to be variable string, got %s in script %s\nScript suspended.", GetParamInfo(1).c_str(), ToKindStr(_lastParamType, _lastParamArrayType), ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } @@ -357,7 +365,7 @@ namespace CLEO if ((size_t)ptr <= MinValidAddress) { - SHOW_ERROR("Invalid '0x%X' pointer of output string argument #%d in script %s \nScript suspended.", ptr, CLEO_GetParamsHandledCount() + 1, ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid '0x%X' pointer of output string argument %s in script %s \nScript suspended.", ptr, GetParamInfo(1).c_str(), ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } @@ -370,7 +378,7 @@ namespace CLEO if (buff == nullptr) // all error types already handled, but check just in case { - SHOW_ERROR("Invalid output argument #%d in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid output argument %s in script %s\nScript suspended.", GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } @@ -398,31 +406,31 @@ namespace CLEO // TOD: add range checks for limited size types? #define OPCODE_READ_PARAM_BOOL() _readParam(thread).bParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_INT8() _readParam(thread).cParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_UINT8() _readParam(thread).ucParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_INT16() _readParam(thread).wParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_UINT16() _readParam(thread).usParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_INT() _readParam(thread).nParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_UINT() _readParam(thread).dwParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_FLOAT() _readParamFloat(thread).fParam; \ - if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument %s expected to be float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_ANY32() _readParam(thread).dwParam; \ - if (!_paramWasInt() && !_paramWasFloat()) { SHOW_ERROR("Input argument #%d expected to be int or float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt() && !_paramWasFloat()) { SHOW_ERROR("Input argument %s expected to be int or float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_STRING(_varName) char _buff_##_varName[MAX_STR_LEN + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, MAX_STR_LEN + 1); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } @@ -431,63 +439,63 @@ namespace CLEO #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (_paramsArray[0].dwParam <= MinValidAddress) { SHOW_ERROR("Invalid pointer '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (_paramsArray[0].dwParam <= MinValidAddress) { SHOW_ERROR("Invalid pointer '0x%X' input argument %s in script %s \nScript suspended.", _paramsArray[0].dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OBJECT_HANDLE() _readParam(thread).dwParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (!IsObjectHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid object handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (!IsObjectHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid object handle '0x%X' input argument %s in script %s \nScript suspended.", _paramsArray[0].dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_PED_HANDLE() _readParam(thread).dwParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (!IsPedHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid character handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (!IsPedHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid character handle '0x%X' input argument %s in script %s \nScript suspended.", _paramsArray[0].dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_VEHICLE_HANDLE() _readParam(thread).dwParam; \ - if (!_paramWasInt()) { SHOW_ERROR("Input argument #%d expected to be integer, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - else if (!IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument #%d in script %s \nScript suspended.", _paramsArray[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + else if (!IsVehicleHandleValid(_paramsArray[0].dwParam)) { SHOW_ERROR("Invalid vehicle handle '0x%X' input argument %s in script %s \nScript suspended.", _paramsArray[0].dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_ANY32() _readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable int or float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasVariable()) { SHOW_ERROR("Output argument %s expected to be variable int or float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_INT() (int*)_readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasVariable()) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() (float*)_readParamVariable(thread); \ - if (!_paramWasVariable()) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasVariable()) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type #define OPCODE_WRITE_PARAM_BOOL(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT8(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT8(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT16(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT16(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_INT(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_UINT(value) _writeParam(thread, value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_ANY32(value) _writeParam(thread, value); \ - if (!_paramWasInt(true) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be int or float variable, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be int or float variable, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ - if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument #%d expected to be variable float, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, (void*)value); \ - if (!_paramWasInt(true)) { SHOW_ERROR("Output argument #%d expected to be variable int, got %s in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 37a38227..e248fc48 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -2166,4 +2166,18 @@ extern "C" std::memcpy(buf, text.c_str(), text.length() + 1); // with terminator } + + void WINAPI CLEO_GetScriptParamInfoStr(int idexOffset, char* buf, DWORD bufSize) + { + auto curr = idexOffset - 1 + GetInstance().OpcodeSystem.handledParamCount; + auto name = GetInstance().OpcodeInfoDb.GetArgumentName(GetInstance().OpcodeSystem.lastOpcode, curr); + + curr++; // 1-based argument index display + + std::string msg; + if (name != nullptr) msg = StringPrintf("#%d \"%s\"", curr, name); + else msg = StringPrintf("#%d", curr); + + strncpy(buf, msg.c_str(), bufSize); + } } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 1c1cff1f..f7571ae0 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -149,7 +149,7 @@ namespace CLEO if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { - LOG_WARNING(thread, "Invalid '0x%X' pointer of input string argument #%d in script %s", opcodeParams[0].dwParam, CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); + LOG_WARNING(thread, "Invalid '0x%X' pointer of input string argument %s in script %s", opcodeParams[0].dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); return nullptr; // error } @@ -228,7 +228,7 @@ namespace CLEO } // unsupported param type - LOG_WARNING(thread, "Argument #%d expected to be string, got %s in script %s", CLEO_GetParamsHandledCount(), ToKindStr(paramType, arrayType), ScriptInfoStr(thread).c_str()); + LOG_WARNING(thread, "Argument %s expected to be string, got %s in script %s", GetParamInfo().c_str(), ToKindStr(paramType, arrayType), ScriptInfoStr(thread).c_str()); CLEO_SkipOpcodeParams(thread, 1); // try skip unhandled param return nullptr; // error } diff --git a/source/OpcodeInfoDatabase.cpp b/source/OpcodeInfoDatabase.cpp index 5e96bfbc..3c415719 100644 --- a/source/OpcodeInfoDatabase.cpp +++ b/source/OpcodeInfoDatabase.cpp @@ -2,6 +2,7 @@ #include "OpcodeInfoDatabase.h" #include "json.hpp" #include +#include using namespace std; using namespace json; @@ -33,7 +34,7 @@ bool OpcodeInfoDatabase::Load(const char* filepath) return false; } - std::string text; + string text; text.resize((size_t)size); file.read(text.data(), size); file.close(); @@ -49,7 +50,7 @@ bool OpcodeInfoDatabase::Load(const char* filepath) { root = JSON::Load(text.c_str()); } - catch (const std::exception& ex) + catch (const exception& ex) { TRACE("Error while parsing opcodes database '%s' file:\n%s", filepath, ex.what()); return false; @@ -82,18 +83,45 @@ bool OpcodeInfoDatabase::Load(const char* filepath) continue; // invalid command } - auto id = std::stoul(commandId.ToString(), nullptr, 16); - if (id > 0x7FFF) + auto idLong = stoul(commandId.ToString(), nullptr, 16); + if (idLong > 0x7FFF) { continue; // opcode out of bounds } + auto id = (uint16_t)idLong; - extension.opcodes[(uint16_t)id] = commandName.ToString(); + extension.opcodes.emplace(piecewise_construct, make_tuple(id), make_tuple(id, commandName.ToString())); + auto& opcode = extension.opcodes.at(id); + + // read arguments info + auto inputArgs = c["input"]; + if (inputArgs.JSONType() == JSON::Class::Array) + { + for (auto& p : inputArgs.ArrayRange()) + { + if(p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) + { + opcode.arguments.emplace_back(p["name"].ToString().c_str()); + } + } + } + + auto outputArgs = c["output"]; + if (outputArgs.JSONType() == JSON::Class::Array) + { + for (auto& p : outputArgs.ArrayRange()) + { + if (p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) + { + opcode.arguments.emplace_back(p["name"].ToString().c_str()); + } + } + } } if (!extension.opcodes.empty()) { - extensions[extension.name] = std::move(extension); + extensions[extension.name] = move(extension); } } @@ -131,7 +159,7 @@ const char* OpcodeInfoDatabase::GetExtensionName(const char* commandName) const for (auto& opcode : opcodes) { - if (_strcmpi(commandName, opcode.second.c_str()) == 0) + if (_strcmpi(commandName, opcode.second.name.c_str()) == 0) { return extension.name.c_str(); } @@ -153,7 +181,7 @@ uint16_t OpcodeInfoDatabase::GetOpcode(const char* commandName) const for (auto& opcode : opcodes) { - if (_strcmpi(commandName, opcode.second.c_str()) == 0) + if (_strcmpi(commandName, opcode.second.name.c_str()) == 0) { return opcode.first; } @@ -174,7 +202,28 @@ const char* OpcodeInfoDatabase::GetCommandName(uint16_t opcode) const if (opcodes.find(opcode) != opcodes.end()) { - return opcodes.at(opcode).c_str(); + return opcodes.at(opcode).name.c_str(); + } + } + } + + return nullptr; +} + +const char* OpcodeInfoDatabase::GetArgumentName(uint16_t opcode, size_t paramIdx) const +{ + if (ok) + { + for (auto& entry : extensions) + { + auto& opcodes = entry.second.opcodes; + + if (opcodes.find(opcode) != opcodes.end()) + { + if(paramIdx < opcodes.at(opcode).arguments.size()) + { + return opcodes.at(opcode).arguments[paramIdx].name.c_str(); + } } } } @@ -192,3 +241,4 @@ std::string OpcodeInfoDatabase::GetExtensionMissingMessage(uint16_t opcode) cons return CLEO::StringPrintf("CLEO extension plugin \"%s\" is missing!", extensionName); } + diff --git a/source/OpcodeInfoDatabase.h b/source/OpcodeInfoDatabase.h index c849f9bb..58c86534 100644 --- a/source/OpcodeInfoDatabase.h +++ b/source/OpcodeInfoDatabase.h @@ -5,10 +5,32 @@ class OpcodeInfoDatabase { + struct OpcodeArgument + { + std::string name; + + OpcodeArgument() = default; + OpcodeArgument(const char* name) : name(name) + { + } + }; + + struct Opcode + { + uint16_t id; + std::string name; + std::vector arguments; + + Opcode() = default; + Opcode(uint16_t id, std::string name) : id(id), name(name) + { + } + }; + struct Extension { std::string name; - std::map opcodes; + std::map opcodes; }; bool ok = false; @@ -26,6 +48,8 @@ class OpcodeInfoDatabase uint16_t GetOpcode(const char* commandName) const; // 0xFFFF if not found const char* GetCommandName(uint16_t opcode) const; // nullptr if not found + const char* GetArgumentName(uint16_t opcode, size_t paramIdx) const; // nullptr if not found + std::string GetExtensionMissingMessage(uint16_t opcode) const; // extension "x" missing message if known, empty text otherwise }; diff --git a/source/cleo.def b/source/cleo.def index 9e018aa6..a289abb9 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -33,19 +33,20 @@ EXPORTS _CLEO_ReadParamsFormatted@16 @30 _CLEO_GetScriptVersion@4 @31 _CLEO_GetScriptInfoStr@16 @32 - _CLEO_ResolvePath@12 @33 - _CLEO_GetScriptDebugMode@4 @34 - _CLEO_SetScriptDebugMode@8 @35 - _CLEO_Log@8 @36 - _CLEO_ReadStringParamWriteBuffer@16 @37 - _CLEO_GetOpcodeParamsArray@0 @38 - _CLEO_GetParamsHandledCount@0 @39 - _CLEO_PeekIntOpcodeParam@4 @40 - _CLEO_PeekFloatOpcodeParam@4 @41 - _CLEO_PeekPointerToScriptVariable@4 @42 - _CLEO_GetScriptByName@16 @43 - _CLEO_GetScriptByFilename@8 @44 - _CLEO_GetScriptFilename@4 @45 - _CLEO_GetScriptWorkDir@4 @46 - _CLEO_SetScriptWorkDir@8 @47 - _CLEO_RegisterCommand@8 @48 + _CLEO_GetScriptParamInfoStr@12 @33 + _CLEO_ResolvePath@12 @34 + _CLEO_GetScriptDebugMode@4 @35 + _CLEO_SetScriptDebugMode@8 @36 + _CLEO_Log@8 @37 + _CLEO_ReadStringParamWriteBuffer@16 @38 + _CLEO_GetOpcodeParamsArray@0 @39 + _CLEO_GetParamsHandledCount@0 @40 + _CLEO_PeekIntOpcodeParam@4 @41 + _CLEO_PeekFloatOpcodeParam@4 @42 + _CLEO_PeekPointerToScriptVariable@4 @43 + _CLEO_GetScriptByName@16 @44 + _CLEO_GetScriptByFilename@8 @45 + _CLEO_GetScriptFilename@4 @46 + _CLEO_GetScriptWorkDir@4 @47 + _CLEO_SetScriptWorkDir@8 @48 + _CLEO_RegisterCommand@8 @49 From cf7c5ab4406e016300cf4433a8cf410db082cf7a Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 10 Mar 2024 02:21:25 +0100 Subject: [PATCH 119/216] Loading opcodes database in separated thread. --- source/OpcodeInfoDatabase.cpp | 34 ++++++++++++++++++++-------------- source/OpcodeInfoDatabase.h | 9 ++++++--- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/source/OpcodeInfoDatabase.cpp b/source/OpcodeInfoDatabase.cpp index 3c415719..ee42d77a 100644 --- a/source/OpcodeInfoDatabase.cpp +++ b/source/OpcodeInfoDatabase.cpp @@ -3,24 +3,19 @@ #include "json.hpp" #include #include +#include using namespace std; using namespace json; -void OpcodeInfoDatabase::Clear() -{ - ok = false; - extensions.clear(); -} - -bool OpcodeInfoDatabase::Load(const char* filepath) +bool OpcodeInfoDatabase::_Load(const std::string filepath) { Clear(); - ifstream file(filepath); + ifstream file(filepath.c_str()); if (file.fail()) { - TRACE("Failed to open opcodes database '%s' file.", filepath); + TRACE("Failed to open opcodes database '%s' file.", filepath.c_str()); return false; } @@ -30,7 +25,7 @@ bool OpcodeInfoDatabase::Load(const char* filepath) if (size > 8 * 1024 * 1024) // 8MB is reasonable json file size upper limit { - TRACE("Opcodes database '%s' file too large to load.", filepath); + TRACE("Opcodes database '%s' file too large to load.", filepath.c_str()); return false; } @@ -41,7 +36,7 @@ bool OpcodeInfoDatabase::Load(const char* filepath) if (file.fail()) { - TRACE("Error while reading opcodes database '%s' file.", filepath); + TRACE("Error while reading opcodes database '%s' file.", filepath.c_str()); return false; } @@ -52,13 +47,13 @@ bool OpcodeInfoDatabase::Load(const char* filepath) } catch (const exception& ex) { - TRACE("Error while parsing opcodes database '%s' file:\n%s", filepath, ex.what()); + TRACE("Error while parsing opcodes database '%s' file:\n%s", filepath.c_str(), ex.what()); return false; } if (root.IsNull() || root["extensions"].JSONType() != JSON::Class::Array) { - TRACE("Invalid opcodes database '%s' file.", filepath); + TRACE("Invalid opcodes database '%s' file.", filepath.c_str()); return false; } @@ -99,7 +94,7 @@ bool OpcodeInfoDatabase::Load(const char* filepath) { for (auto& p : inputArgs.ArrayRange()) { - if(p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) + if (p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) { opcode.arguments.emplace_back(p["name"].ToString().c_str()); } @@ -129,6 +124,17 @@ bool OpcodeInfoDatabase::Load(const char* filepath) return true; } +void OpcodeInfoDatabase::Clear() +{ + ok = false; + extensions.clear(); +} + +void OpcodeInfoDatabase::Load(const char* filepath) +{ + thread(&OpcodeInfoDatabase::_Load, this, std::string(filepath)).detach(); // asynchronic execute +} + const char* OpcodeInfoDatabase::GetExtensionName(uint16_t opcode) const { if (ok) diff --git a/source/OpcodeInfoDatabase.h b/source/OpcodeInfoDatabase.h index 58c86534..663e2e14 100644 --- a/source/OpcodeInfoDatabase.h +++ b/source/OpcodeInfoDatabase.h @@ -1,6 +1,7 @@ #pragma once -#include +#include #include +#include class OpcodeInfoDatabase @@ -33,14 +34,16 @@ class OpcodeInfoDatabase std::map opcodes; }; - bool ok = false; + std::atomic ok = false; std::map extensions; + bool _Load(const std::string filepath); + public: OpcodeInfoDatabase() = default; void Clear(); - bool Load(const char* filepath); + void Load(const char* filepath); // triggers asynchronic load const char* GetExtensionName(uint16_t opcode) const; // nullptr if not found const char* GetExtensionName(const char* commandName) const; // nullptr if not found From bae44bab9dcd0f4921434f06917e97cfffc82807 Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 10 Mar 2024 23:24:23 +0100 Subject: [PATCH 120/216] Fix of text mode support for file streams. --- cleo_plugins/FileSystemOperations/FileUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleo_plugins/FileSystemOperations/FileUtils.cpp b/cleo_plugins/FileSystemOperations/FileUtils.cpp index 270fc130..644f6336 100644 --- a/cleo_plugins/FileSystemOperations/FileUtils.cpp +++ b/cleo_plugins/FileSystemOperations/FileUtils.cpp @@ -115,7 +115,7 @@ DWORD File::open(const char* filename, const char* mode, bool legacy) // By default open as binary mode. // Generally text mode is not well documented in C and many file related functions has undefined behavior. For example 'ftell' returns invalid values. - if (valid && !binary) + if (valid && (!binary && !text)) { strcpy(modeUpdated, mode); strcat(modeUpdated, "b"); From 63e98d6b87d8d39bb6e64f90f6bfc08619640b68 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 7 Mar 2024 02:18:53 +0100 Subject: [PATCH 121/216] SCM function handling by GOSUB return. --- source/CCustomOpcodeSystem.cpp | 20 ++++++++++++++++++++ source/CCustomOpcodeSystem.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e248fc48..2070d03d 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -21,6 +21,8 @@ namespace CLEO template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); + OpcodeResult __stdcall opcode_0051(CRunningScript * thread); // GOSUB return + OpcodeResult __stdcall opcode_0A92(CRunningScript *thread); OpcodeResult __stdcall opcode_0A93(CRunningScript *thread); OpcodeResult __stdcall opcode_0A94(CRunningScript *thread); @@ -225,6 +227,8 @@ namespace CLEO CCustomOpcodeSystem::CCustomOpcodeSystem() { TRACE("Initializing CLEO core opcodes..."); + + CLEO_RegisterOpcode(0x0051, opcode_0051); CLEO_RegisterOpcode(0x0A92, opcode_0A92); CLEO_RegisterOpcode(0x0A93, opcode_0A93); CLEO_RegisterOpcode(0x0A94, opcode_0A94); @@ -850,6 +854,22 @@ namespace CLEO /* Opcode definitions */ /************************************************************************/ + OpcodeResult __stdcall CCustomOpcodeSystem::opcode_0051(CRunningScript* thread) // GOSUB return + { + if (thread->SP == 0 && thread->IsCustom() && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too + { + return opcode_0AB2(thread); // try CLEO's function return + } + + if (thread->SP == 0) + { + SHOW_ERROR("`return` used without preceding `gosub` call in script %s\nScript suspended.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return thread->Suspend(); + } + + return originalOpcodeHandlers[0x0051](thread, 0x0051); // call game's original + } + //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 5dc984b7..bdf098e2 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -44,6 +44,8 @@ namespace CLEO static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); + static OpcodeResult __stdcall opcode_0051(CRunningScript* thread); // GOSUB's return + private: typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); From dfb5bca64c3c5c26cdecca08691476b071d164c3 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 7 Mar 2024 08:25:39 +0100 Subject: [PATCH 122/216] Review fix. --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 2070d03d..e9278903 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -858,7 +858,7 @@ namespace CLEO { if (thread->SP == 0 && thread->IsCustom() && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too { - return opcode_0AB2(thread); // try CLEO's function return + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0051, thread, false); // try CLEO's function return } if (thread->SP == 0) From cfbd9bf1e99ebf92e043c0d4f8627dd4872d1f97 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 7 Mar 2024 09:06:38 +0100 Subject: [PATCH 123/216] fixup! Review fix. --- source/CCustomOpcodeSystem.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e9278903..43e4a7bf 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -867,7 +867,8 @@ namespace CLEO return thread->Suspend(); } - return originalOpcodeHandlers[0x0051](thread, 0x0051); // call game's original + size_t tableIdx = 0x0051 / 100; // 100 opcodes peer handler table + return originalOpcodeHandlers[tableIdx](thread, 0x0051); // call game's original } //0A92=-1,create_custom_thread %1d% From a4a64b6d0669becd991931adc888ffb498e8d8b3 Mon Sep 17 00:00:00 2001 From: Seemann Date: Sun, 10 Mar 2024 18:32:55 -0400 Subject: [PATCH 124/216] update tests --- tests/cleo_tests/.Compile_All.bat | 8 +++++--- tests/cleo_tests/FilesystemOperations/0A9A.txt | 3 +-- tests/cleo_tests/MemoryOperations/0A8C.txt | 5 ++--- tests/cleo_tests/MemoryOperations/0A8D.txt | 4 ++-- tests/cleo_tests/MemoryOperations/0A96.txt | 3 +-- tests/cleo_tests/MemoryOperations/0A97.txt | 2 +- tests/cleo_tests/MemoryOperations/0A98.txt | 2 +- tests/cleo_tests/MemoryOperations/0AA4.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC6.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC7.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC8.txt | 7 +++---- tests/cleo_tests/MemoryOperations/0AC9.txt | 2 +- tests/cleo_tests/MemoryOperations/0AE9.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEA.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEB.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEC.txt | 2 +- tests/cleo_tests/cleo_tester.txt | 2 +- 17 files changed, 25 insertions(+), 27 deletions(-) diff --git a/tests/cleo_tests/.Compile_All.bat b/tests/cleo_tests/.Compile_All.bat index 31766810..de4ec11c 100644 --- a/tests/cleo_tests/.Compile_All.bat +++ b/tests/cleo_tests/.Compile_All.bat @@ -13,9 +13,11 @@ for /f "delims=" %%i in ('dir /b /s *.s') do ( @REM Compile all .txt files in the current directory and subdirectories for /f "delims=" %%i in ('dir /b /s *.txt') do ( - set p=%%i - echo Compiling !p:%__CD__%=!... - %SANNY% --compile "%%i" "%%~dpni.s" --no-splash --mode sa_sbl + if not "%%~nxi" == "cleo_tester.txt" ( + set p=%%i + echo Compiling !p:%__CD__%=!... + %SANNY% --compile "%%i" "%%~dpni.s" --no-splash --mode sa_sbl + ) ) echo Done. diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index 9e1c91f2..0518c8de 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -12,8 +12,7 @@ function tests it("should fail on a non-existing file", @test1) it("should open existing file", @test2) - - return true + return function test1 if diff --git a/tests/cleo_tests/MemoryOperations/0A8C.txt b/tests/cleo_tests/MemoryOperations/0A8C.txt index 3d78f687..eab94080 100644 --- a/tests/cleo_tests/MemoryOperations/0A8C.txt +++ b/tests/cleo_tests/MemoryOperations/0A8C.txt @@ -17,15 +17,14 @@ function tests it("should write 5 bytes", @test6) it("should write 7 bytes", @test7) it("should write float", @test8) - - return true + return :before get_var_pointer 2@ {store_to} 0@ 1@ = 0xcccccccc 2@ = 0xdddddddd 3@ = 0xeeeeeeee - 0051: return + return function test1 write_memory {address} 0@ {size} 0 {value} 0x11223344 {vp} false diff --git a/tests/cleo_tests/MemoryOperations/0A8D.txt b/tests/cleo_tests/MemoryOperations/0A8D.txt index 4bca250f..1bc929b5 100644 --- a/tests/cleo_tests/MemoryOperations/0A8D.txt +++ b/tests/cleo_tests/MemoryOperations/0A8D.txt @@ -15,14 +15,14 @@ function tests it("should read 4 bytes", @test5) it("should read float", @test6) - return true + return :prepare_tests 0@ = get_label_pointer @DATA 1@ = 0xcccccccc 2@ = 0xdddddddd 3@ = 0xeeeeeeee - 0051: return // sanny does not allow single `return` command in function context + return function test1 2@ = read_memory {address} 0@ {size} 0 {vp} false diff --git a/tests/cleo_tests/MemoryOperations/0A96.txt b/tests/cleo_tests/MemoryOperations/0A96.txt index 4fb436b1..dc5c20bb 100644 --- a/tests/cleo_tests/MemoryOperations/0A96.txt +++ b/tests/cleo_tests/MemoryOperations/0A96.txt @@ -8,8 +8,7 @@ terminate_this_custom_script function tests it("should return valid pointer", @test1) - - return true + return function test1 int handle = get_player_char 0 diff --git a/tests/cleo_tests/MemoryOperations/0A97.txt b/tests/cleo_tests/MemoryOperations/0A97.txt index 034ad422..1717e8d6 100644 --- a/tests/cleo_tests/MemoryOperations/0A97.txt +++ b/tests/cleo_tests/MemoryOperations/0A97.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should return a valid pointer", @test1) - return true + return function test1 request_model 400 diff --git a/tests/cleo_tests/MemoryOperations/0A98.txt b/tests/cleo_tests/MemoryOperations/0A98.txt index 3b6bf28f..60c2c264 100644 --- a/tests/cleo_tests/MemoryOperations/0A98.txt +++ b/tests/cleo_tests/MemoryOperations/0A98.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should return a valid pointer", @test1) - return true + return function test1 request_model 333 // golf club diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt index 2192b551..ac496cd2 100644 --- a/tests/cleo_tests/MemoryOperations/0AA4.txt +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should return address of Sleep function from kernel32.dll", @test1) - return true + return function test1 int load_library_addr = read_memory 0x858070 4 false diff --git a/tests/cleo_tests/MemoryOperations/0AC6.txt b/tests/cleo_tests/MemoryOperations/0AC6.txt index 3ae125fe..8df82524 100644 --- a/tests/cleo_tests/MemoryOperations/0AC6.txt +++ b/tests/cleo_tests/MemoryOperations/0AC6.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should return valid pointer", @test1) - return true + return function test1 int ptr = 0 diff --git a/tests/cleo_tests/MemoryOperations/0AC7.txt b/tests/cleo_tests/MemoryOperations/0AC7.txt index f202c8d6..1974662f 100644 --- a/tests/cleo_tests/MemoryOperations/0AC7.txt +++ b/tests/cleo_tests/MemoryOperations/0AC7.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should return valid pointer", @test1) - return true + return function test1 int val1 = 0x11223344 diff --git a/tests/cleo_tests/MemoryOperations/0AC8.txt b/tests/cleo_tests/MemoryOperations/0AC8.txt index 03d92f0a..057244f4 100644 --- a/tests/cleo_tests/MemoryOperations/0AC8.txt +++ b/tests/cleo_tests/MemoryOperations/0AC8.txt @@ -12,8 +12,7 @@ function tests it("should return valid pointer", @test1) it("should point to zero-filled mem in CLEO5", @test2) - - return true + return function test1 // 0@ is set in before_each callback @@ -30,11 +29,11 @@ function tests :allocate 0@ = 0x11223344 0@ = allocate_memory {size} 4 - 0051: return + return :free free_memory {address} 0@ - 0051: return + return end \ No newline at end of file diff --git a/tests/cleo_tests/MemoryOperations/0AC9.txt b/tests/cleo_tests/MemoryOperations/0AC9.txt index 120eb3f1..11c1eb33 100644 --- a/tests/cleo_tests/MemoryOperations/0AC9.txt +++ b/tests/cleo_tests/MemoryOperations/0AC9.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should free allocated memory", @test1) - return true + return function test1 int ptr = 0x11223344 diff --git a/tests/cleo_tests/MemoryOperations/0AE9.txt b/tests/cleo_tests/MemoryOperations/0AE9.txt index 6aa9ece7..6e76df78 100644 --- a/tests/cleo_tests/MemoryOperations/0AE9.txt +++ b/tests/cleo_tests/MemoryOperations/0AE9.txt @@ -8,7 +8,7 @@ terminate_this_custom_script function tests it("should pop float from stack", @test1) - return true + return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AEA.txt b/tests/cleo_tests/MemoryOperations/0AEA.txt index fc1b5de2..555bb926 100644 --- a/tests/cleo_tests/MemoryOperations/0AEA.txt +++ b/tests/cleo_tests/MemoryOperations/0AEA.txt @@ -7,7 +7,7 @@ terminate_this_custom_script function tests it("should return ped handle for pointer", @test1) - return true + return function test1 int handle = get_player_char 0 diff --git a/tests/cleo_tests/MemoryOperations/0AEB.txt b/tests/cleo_tests/MemoryOperations/0AEB.txt index 9647cada..de66d9bd 100644 --- a/tests/cleo_tests/MemoryOperations/0AEB.txt +++ b/tests/cleo_tests/MemoryOperations/0AEB.txt @@ -7,7 +7,7 @@ terminate_this_custom_script function tests it("should return vehicle handle for pointer", @test1) - return true + return function test1 request_model 400 diff --git a/tests/cleo_tests/MemoryOperations/0AEC.txt b/tests/cleo_tests/MemoryOperations/0AEC.txt index 57333051..e97d7521 100644 --- a/tests/cleo_tests/MemoryOperations/0AEC.txt +++ b/tests/cleo_tests/MemoryOperations/0AEC.txt @@ -7,7 +7,7 @@ terminate_this_custom_script function tests it("should return vehicle handle for pointer", @test1) - return true + return function test1 request_model 333 // golf club diff --git a/tests/cleo_tests/cleo_tester.txt b/tests/cleo_tests/cleo_tester.txt index 29be0a8e..27a8fd9b 100644 --- a/tests/cleo_tests/cleo_tester.txt +++ b/tests/cleo_tests/cleo_tester.txt @@ -58,7 +58,7 @@ function it(spec_name: integer, callback: int) index++ _cleo_tester_write_var(VAR_TEST_INDEX, index) - return true + return function run_spec // this function should use 0 local variables From 8447beb4c8f4fb29442ce74f376567a22b5d4cc0 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 12 Mar 2024 03:46:45 +0100 Subject: [PATCH 125/216] Reduced amount of log entries from FXT files parsing. (#102) --- source/CPluginSystem.h | 2 +- source/CTextManager.cpp | 20 +++++++++++++------- source/CTextManager.h | 2 +- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 58341806..e9fe7fa8 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -42,7 +42,7 @@ namespace CLEO { if (loaded.find(name) != loaded.end()) { - LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", fullPath, name); + LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name, fullPath); return; } diff --git a/source/CTextManager.cpp b/source/CTextManager.cpp index e14cdc02..c43904d4 100644 --- a/source/CTextManager.cpp +++ b/source/CTextManager.cpp @@ -133,15 +133,12 @@ namespace CLEO std::string str = key; std::transform(str.begin(), str.end(), str.begin(), ::toupper); fxts[str.c_str()] = new FxtEntry(value, !dynamic); - - TRACE("Added FXT[%s]", str.c_str()); } return true; } bool CTextManager::RemoveFxt(const char *key) { - TRACE("Deleting FXT[%s]", key); return fxts.erase(key) != 0; } @@ -174,7 +171,7 @@ namespace CLEO CTextManager::~CTextManager() { - TRACE("Deleting fxts..."); + TRACE("Deleting FXTs..."); size_t count = 0; for (auto it = fxts.begin(); it != fxts.end();) { @@ -195,7 +192,8 @@ namespace CLEO try { std::ifstream stream(fullPath); - ParseFxtFile(stream); + auto result = ParseFxtFile(stream); + TRACE("Added %d new FXT entries from file %s", result, fullPath); } catch (std::exception& ex) { @@ -229,12 +227,13 @@ namespace CLEO { } - void CTextManager::ParseFxtFile(std::istream& stream) + size_t CTextManager::ParseFxtFile(std::istream& stream) { static char buf[0x100]; char *key_iterator, *value_iterator, *value_start, *key_start; stream.exceptions(std::ios::badbit); + size_t addedCount = 0; while (true) { if (stream.eof()) break; @@ -264,12 +263,19 @@ namespace CLEO break; value_iterator++; } + // register found fxt entry - AddFxt(key_start, value_start, false); + if (AddFxt(key_start, value_start, false)) + { + addedCount++; + } + break; } key_iterator++; } } + + return addedCount; } } diff --git a/source/CTextManager.h b/source/CTextManager.h index bd86cd90..1ec67c1a 100644 --- a/source/CTextManager.h +++ b/source/CTextManager.h @@ -36,7 +36,7 @@ namespace CLEO const char *LocateFxt(const char *key); // erase all fxts, added by scripts void ClearDynamicFxts(); - void ParseFxtFile(std::istream& stream); + size_t ParseFxtFile(std::istream& stream); virtual void Inject(CCodeInjector& inj); }; From c5f4858136ca6d45de64950e9fa3c3e4f4d236a4 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 12 Mar 2024 03:48:29 +0100 Subject: [PATCH 126/216] Let LoadLibrary API resolve library paths itself. (#101) --- cleo_plugins/MemoryOperations/MemoryOperations.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 8220dcf7..648cd134 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -2,6 +2,7 @@ #include "CLEO_Utils.h" #include "plugin.h" #include "CTheScripts.h" +#include #include using namespace CLEO; @@ -361,7 +362,17 @@ class MemoryOperations //0AA2=2, load_dynamic_library %1s% store_to %2d% // IF and SET static OpcodeResult __stdcall opcode_0AA2(CLEO::CRunningScript* thread) { - OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING(path); + + // get absolute path + // in case of just filename let LoadLibrary resolve it itself + char buff[MAX_PATH]; + if (std::filesystem::path(path).has_parent_path()) + { + strncpy(buff, path, sizeof(buff)); + CLEO_ResolvePath(thread, buff, sizeof(buff)); + path = buff; + } auto ptr = LoadLibrary(path); if (ptr != nullptr) From 339b5ac62ab42de3d4a3c41cd1f1fb3813a94c74 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 12 Mar 2024 05:38:13 +0100 Subject: [PATCH 127/216] Allow 0051 to return cleo functions in main.scm too. (#103) --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 43e4a7bf..c03d6059 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -856,7 +856,7 @@ namespace CLEO OpcodeResult __stdcall CCustomOpcodeSystem::opcode_0051(CRunningScript* thread) // GOSUB return { - if (thread->SP == 0 && thread->IsCustom() && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too + if (thread->SP == 0 && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too { return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0051, thread, false); // try CLEO's function return } From f8cc37d20092caf8c76c822c5904ed6f55b39049 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 18 Mar 2024 16:54:02 +0100 Subject: [PATCH 128/216] Bugfix of cleo_call string variables support. (#105) --- source/CCustomOpcodeSystem.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c03d6059..3236ebaa 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1093,17 +1093,11 @@ namespace CLEO { *thread >> arg->fParam; } - else if (IsVarString(paramType)) - { - arg->pParam = GetScriptParamPointer(thread); - if (arg->pParam >= locals && arg->pParam < localsEnd) // correct scoped variable's pointer - { - arg->dwParam -= (DWORD)locals; - arg->dwParam += (DWORD)storedLocals; - } - } - else if (IsImmString(paramType)) // those texts exists in script code, but without terminator character. Copy is necessary + else if (IsImmString(paramType) || IsVarString(paramType)) { + // imm string texts exists in script code, but without terminator character. + // For strings stored in variables there is no guarantee these will end with terminator. + // In both cases copy is necessary to create proper c-string char tmp[MAX_STR_LEN + 1]; auto str = ReadStringParam(thread, tmp, sizeof(tmp)); scmFunc->stringParams.emplace_back(str); From a40358821034c46ce842c3bf08b0fc7217211d6c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 19 Mar 2024 13:52:29 +0100 Subject: [PATCH 129/216] Implemented audio stream types (sfx/music). (#106) --- CHANGELOG.md | 3 +++ cleo_plugins/Audio/Audio.cpp | 26 +++++++++++++++++++++ cleo_plugins/Audio/Audio.vcxproj | 6 +++++ cleo_plugins/Audio/Audio.vcxproj.filters | 6 +++++ cleo_plugins/Audio/CAudioStream.cpp | 29 +++++++++++++++++++++++- cleo_plugins/Audio/CAudioStream.h | 5 ++++ cleo_plugins/Audio/CSoundSystem.cpp | 14 ++++++++---- cleo_plugins/Audio/CSoundSystem.h | 12 ++++++++-- cleo_plugins/Audio/SA.Audio.ini | 7 ++++++ 9 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 cleo_plugins/Audio/SA.Audio.ini diff --git a/CHANGELOG.md b/CHANGELOG.md index 09368e45..0a16ee1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - CLEO's audio now obey game's volume settings - implemented Doppler effect for 3d audio streams (fast moving sound sources) - CLEO's audio now follows game speed changes + - sound device can be now manually selected in .ini file - new opcode **2500 ([is_audio_stream_playing](https://library.sannybuilder.com/#/sa/audio/2500))** - new opcode **2501 ([get_audio_stream_duration](https://library.sannybuilder.com/#/sa/audio/2501))** - new opcode **2502 ([get_audio_stream_speed](https://library.sannybuilder.com/#/sa/audio/2502))** @@ -15,6 +16,8 @@ - new opcode **2506 ([set_audio_stream_source_size](https://library.sannybuilder.com/#/sa/audio/2506))** - new opcode **2507 ([get_audio_stream_progress](https://library.sannybuilder.com/#/sa/audio/2507))** - new opcode **2508 ([set_audio_stream_progress](https://library.sannybuilder.com/#/sa/audio/2508))** + - new opcode **2509 ([get_audio_stream_type](https://library.sannybuilder.com/#/sa/audio/2509))** + - new opcode **250A ([set_audio_stream_type](https://library.sannybuilder.com/#/sa/audio/250A))** - new [DebugUtils](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/DebugUtils) plugin - new opcode **00C3 ([debug_on](https://library.sannybuilder.com/#/sa/debug/00C3))** - new opcode **00C4 ([debug_off](https://library.sannybuilder.com/#/sa/debug/00C4))** diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index fafc2df2..323f6679 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -61,6 +61,9 @@ class Audio CLEO_RegisterOpcode(0x2507, opcode_2507); // get_audio_stream_progress CLEO_RegisterOpcode(0x2508, opcode_2508); // set_audio_stream_progress + CLEO_RegisterOpcode(0x2509, opcode_2509); // get_audio_stream_type + CLEO_RegisterOpcode(0x250A, opcode_250A); // set_audio_stream_type + // register event callbacks CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); CLEO_RegisterCallback(eCallbackId::GameProcess, OnGameProcess); @@ -385,6 +388,29 @@ class Audio return OR_CONTINUE; } + + //2509=2,get_audio_stream_type %1d% store_to %2d% + static OpcodeResult __stdcall opcode_2509(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); + + auto type = eStreamType::None; + if (stream) type = stream->GetType(); + + OPCODE_WRITE_PARAM_INT(type); + return OR_CONTINUE; + } + + //250A=2,set_audio_stream_type %1d% + static OpcodeResult __stdcall opcode_250A(CScriptThread* thread) + { + auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); + auto type = OPCODE_READ_PARAM_INT(); + + if (stream) stream->SetType((eStreamType)type); + + return OR_CONTINUE; + } } audioInstance; CSoundSystem Audio::soundSystem; diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index 39afc1b1..f1762915 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -86,6 +86,7 @@ taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" @@ -109,6 +110,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" @@ -139,6 +141,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + @@ -152,6 +155,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + diff --git a/cleo_plugins/Audio/Audio.vcxproj.filters b/cleo_plugins/Audio/Audio.vcxproj.filters index 6edb1ae1..9d593543 100644 --- a/cleo_plugins/Audio/Audio.vcxproj.filters +++ b/cleo_plugins/Audio/Audio.vcxproj.filters @@ -41,6 +41,9 @@ + + plugin_sdk + @@ -67,4 +70,7 @@ {06b76bd2-09a7-4369-b83b-0298428ebb4f} + + + \ No newline at end of file diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index d29a23bf..d41097e4 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -16,6 +16,7 @@ CAudioStream::CAudioStream(const char* filepath) return; } + SetType(CSoundSystem::defaultStreamType); BASS_ChannelGetAttribute(streamInternal, BASS_ATTRIB_FREQ, &rate); ok = true; } @@ -131,6 +132,25 @@ float CAudioStream::GetSpeed() const return (float)speed; } +void CLEO::CAudioStream::SetType(eStreamType value) +{ + switch(value) + { + case eStreamType::SoundEffect: + case eStreamType::Music: + type = value; + break; + + default: + type = None; + } +} + +eStreamType CLEO::CAudioStream::GetType() const +{ + return type; +} + void CAudioStream::UpdateVolume() { if (volume != volumeTarget) @@ -148,7 +168,14 @@ void CAudioStream::UpdateVolume() } } - BASS_ChannelSetAttribute(streamInternal, BASS_ATTRIB_VOL, (float)volume * CSoundSystem::masterVolume); + float masterVolume = 1.0f; + switch(type) + { + case SoundEffect: masterVolume = CSoundSystem::masterVolumeSfx; break; + case Music: masterVolume = CSoundSystem::masterVolumeMusic; break; + } + + BASS_ChannelSetAttribute(streamInternal, BASS_ATTRIB_VOL, (float)volume * masterVolume); } void CAudioStream::UpdateSpeed() diff --git a/cleo_plugins/Audio/CAudioStream.h b/cleo_plugins/Audio/CAudioStream.h index d1302888..8844018a 100644 --- a/cleo_plugins/Audio/CAudioStream.h +++ b/cleo_plugins/Audio/CAudioStream.h @@ -1,4 +1,5 @@ #pragma once +#include "CSoundSystem.h" #include "plugin.h" #include "bass.h" @@ -38,6 +39,9 @@ namespace CLEO void SetSpeed(float value, float transitionTime = 0.0f); float GetSpeed() const; + void SetType(eStreamType value); + eStreamType GetType() const; + void SetVolume(float value, float transitionTime = 0.0f); float GetVolume() const; @@ -51,6 +55,7 @@ namespace CLEO protected: HSTREAM streamInternal = 0; eStreamState state = Paused; + eStreamType type = None; bool ok = false; float rate = 44100.0f; // file's sampling rate double speed = 1.0f; diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index fee34ee8..326045ce 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -12,8 +12,10 @@ namespace CLEO BASS_3DVECTOR CSoundSystem::vel(0.0, 0.0, 0.0); BASS_3DVECTOR CSoundSystem::front(0.0, -1.0, 0.0); BASS_3DVECTOR CSoundSystem::top(0.0, 0.0, 1.0); + eStreamType CSoundSystem::defaultStreamType = eStreamType::SoundEffect; float CSoundSystem::masterSpeed = 1.0f; - float CSoundSystem::masterVolume = 1.0f; + float CSoundSystem::masterVolumeSfx = 1.0f; + float CSoundSystem::masterVolumeMusic = 1.0f; void EnumerateBassDevices(int& total, int& enabled, int& default_device) { @@ -45,12 +47,15 @@ namespace CLEO { if (initialized) return true; // already done + auto config = GetConfigFilename(); + defaultStreamType = (eStreamType)GetPrivateProfileInt("General", "DefaultStreamType", 0, config.c_str()); + int default_device, total_devices, enabled_devices; EnumerateBassDevices(total_devices, enabled_devices, default_device); + int forceDevice = GetPrivateProfileInt("General", "AudioDevice", -1, config.c_str()); BASS_DEVICEINFO info = { nullptr, nullptr, 0 }; - if (forceDevice != -1 && BASS_GetDeviceInfo(forceDevice, &info) && - info.flags & BASS_DEVICE_ENABLED) + if (forceDevice != -1 && BASS_GetDeviceInfo(forceDevice, &info) && (info.flags & BASS_DEVICE_ENABLED)) default_device = forceDevice; TRACE("On system found %d devices, %d enabled devices, assuming device to use: %d (%s)", @@ -160,7 +165,8 @@ namespace CLEO // get game globals masterSpeed = CTimer::ms_fTimeScale; - masterVolume = AEAudioHardware.m_fEffectMasterScalingFactor * 0.5f; // fit to game's sfx volume + masterVolumeSfx = AEAudioHardware.m_fEffectMasterScalingFactor * 0.5f; // fit to game's sfx volume + masterVolumeMusic = AEAudioHardware.m_fMusicMasterScalingFactor * 0.5f; // camera movements CMatrixLink * pMatrix = nullptr; diff --git a/cleo_plugins/Audio/CSoundSystem.h b/cleo_plugins/Audio/CSoundSystem.h index 86e57706..9322f2c0 100644 --- a/cleo_plugins/Audio/CSoundSystem.h +++ b/cleo_plugins/Audio/CSoundSystem.h @@ -7,6 +7,13 @@ namespace CLEO class CAudioStream; class C3DAudioStream; + enum eStreamType + { + None = 0, + SoundEffect, + Music, + }; + class CSoundSystem { friend class CAudioStream; @@ -15,7 +22,6 @@ namespace CLEO std::set streams; BASS_INFO SoundDevice = { 0 }; bool initialized = false; - int forceDevice = -1; bool paused = false; static bool useFloatAudio; @@ -24,8 +30,10 @@ namespace CLEO static BASS_3DVECTOR vel; static BASS_3DVECTOR front; static BASS_3DVECTOR top; + static eStreamType defaultStreamType; static float masterSpeed; // game simulation speed - static float masterVolume; + static float masterVolumeSfx; + static float masterVolumeMusic; public: CSoundSystem() = default; // TODO: give to user an ability to force a sound device to use (ini-file or cmd-line?) diff --git a/cleo_plugins/Audio/SA.Audio.ini b/cleo_plugins/Audio/SA.Audio.ini new file mode 100644 index 00000000..a9925975 --- /dev/null +++ b/cleo_plugins/Audio/SA.Audio.ini @@ -0,0 +1,7 @@ +[General] +; Manually select audio device. Visit `.cleo.log` file to check list of available options. -1 for automatic +AudioDevice=-1 + +; Which game's volume settings CLEO sounds should use by default: 0 - None, 1 - SFX, 2 - Music +DefaultStreamType=1 + From d41d80d8b7dcbc49b844ec4ec72d483b3ea0c978 Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 19 Mar 2024 13:54:31 -0400 Subject: [PATCH 130/216] exit from a function with single return is always false condition (#107) --- source/CCustomOpcodeSystem.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 3236ebaa..e01dc548 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -858,6 +858,7 @@ namespace CLEO { if (thread->SP == 0 && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too { + SetScriptCondResult(thread, false); return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0051, thread, false); // try CLEO's function return } From 130f720b750b8af9ee6f04482f5d3c66eee96863 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 21 Mar 2024 17:05:04 +0100 Subject: [PATCH 131/216] New Text plugin. (#104) New Text plugin --- CHANGELOG.md | 10 + CLEO5.vcxproj | 10 +- cleo_plugins/CLEO_Plugins.sln | 6 + .../FileSystemOperations.cpp | 9 +- .../Text}/CTextManager.cpp | 149 +----- {source => cleo_plugins/Text}/CTextManager.h | 20 +- cleo_plugins/Text/Text.cpp | 424 ++++++++++++++++++ cleo_plugins/Text/Text.vcxproj | 139 ++++++ cleo_plugins/Text/Text.vcxproj.filters | 53 +++ cleo_plugins/Text/crc32.cpp | 91 ++++ cleo_plugins/Text/crc32.h | 8 + cleo_sdk/CLEO.h | 6 +- cleo_sdk/CLEO_Utils.h | 146 ++++-- source/CCustomOpcodeSystem.cpp | 417 ++++++----------- source/CCustomOpcodeSystem.h | 7 - source/CGameVersionManager.cpp | 18 - source/CGameVersionManager.h | 19 - source/CleoBase.cpp | 4 - source/CleoBase.h | 2 - source/FileEnumerator.h | 71 +-- source/cleo.def | 32 +- .../cleo_tests/FilesystemOperations/0A9A.txt | 2 +- tests/cleo_tests/MemoryOperations/0A8C.txt | 2 +- tests/cleo_tests/MemoryOperations/0A8D.txt | 2 +- tests/cleo_tests/MemoryOperations/0A96.txt | 2 +- tests/cleo_tests/MemoryOperations/0A97.txt | 2 +- tests/cleo_tests/MemoryOperations/0A98.txt | 2 +- tests/cleo_tests/MemoryOperations/0AA4.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC6.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC7.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC8.txt | 2 +- tests/cleo_tests/MemoryOperations/0AC9.txt | 2 +- tests/cleo_tests/MemoryOperations/0AE9.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEA.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEB.txt | 2 +- tests/cleo_tests/MemoryOperations/0AEC.txt | 2 +- tests/cleo_tests/Text/0AD3.txt | 72 +++ tests/cleo_tests/Text/0AD4.txt | 75 ++++ tests/cleo_tests/Text/0ADB.txt | 18 + tests/cleo_tests/Text/0ADE.txt | 32 ++ tests/cleo_tests/Text/0ADF.txt | 23 + tests/cleo_tests/Text/0AE0.txt | 25 ++ tests/cleo_tests/Text/0AED.txt | 17 + tests/cleo_tests/Text/2600.txt | 54 +++ tests/cleo_tests/Text/2601.txt | 92 ++++ tests/cleo_tests/Text/2602.txt | 79 ++++ tests/cleo_tests/Text/2603.txt | 79 ++++ tests/cleo_tests/Text/2604.txt | 73 +++ .../{cleo_tester.txt => cleo_tester.inc} | 164 +++++-- 49 files changed, 1840 insertions(+), 634 deletions(-) rename {source => cleo_plugins/Text}/CTextManager.cpp (50%) rename {source => cleo_plugins/Text}/CTextManager.h (57%) create mode 100644 cleo_plugins/Text/Text.cpp create mode 100644 cleo_plugins/Text/Text.vcxproj create mode 100644 cleo_plugins/Text/Text.vcxproj.filters create mode 100644 cleo_plugins/Text/crc32.cpp create mode 100644 cleo_plugins/Text/crc32.h create mode 100644 tests/cleo_tests/Text/0AD3.txt create mode 100644 tests/cleo_tests/Text/0AD4.txt create mode 100644 tests/cleo_tests/Text/0ADB.txt create mode 100644 tests/cleo_tests/Text/0ADE.txt create mode 100644 tests/cleo_tests/Text/0ADF.txt create mode 100644 tests/cleo_tests/Text/0AE0.txt create mode 100644 tests/cleo_tests/Text/0AED.txt create mode 100644 tests/cleo_tests/Text/2600.txt create mode 100644 tests/cleo_tests/Text/2601.txt create mode 100644 tests/cleo_tests/Text/2602.txt create mode 100644 tests/cleo_tests/Text/2603.txt create mode 100644 tests/cleo_tests/Text/2604.txt rename tests/cleo_tests/{cleo_tester.txt => cleo_tester.inc} (54%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a16ee1e..386e4714 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,15 @@ - new opcode **2405 ([is_script_running](https://library.sannybuilder.com/#/sa/memory/2405))** - new opcode **2406 ([get_script_struct_from_filename](https://library.sannybuilder.com/#/sa/memory/2406))** - new opcode **2407 ([is_memory_equal](https://library.sannybuilder.com/#/sa/memory/2407))** +- new [Text](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/Text) plugin + - text related opcodes moved from CLEO core into separated plugin + - new opcode **2600 ([is_text_empty](https://library.sannybuilder.com/#/sa/text/2600))** + - new opcode **2601 ([is_text_equal](https://library.sannybuilder.com/#/sa/text/2601))** + - new opcode **2602 ([is_text_in_text](https://library.sannybuilder.com/#/sa/text/2602))** + - new opcode **2603 ([is_text_prefix](https://library.sannybuilder.com/#/sa/text/2603))** + - new opcode **2604 ([is_text_suffix](https://library.sannybuilder.com/#/sa/text/2604))** - new and updated opcodes + - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** @@ -99,6 +107,8 @@ - new SDK method: CLEO_GetScriptWorkDir - new SDK method: CLEO_SetScriptWorkDir - new SDK method: CLEO_ResolvePath +- new SDK method: CLEO_ListDirectory +- new SDK method: CLEO_ListDirectoryFree - new SDK method: CLEO_GetScriptByName - new SDK method: CLEO_GetScriptByFilename - new SDK method: CLEO_GetScriptDebugMode diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index bfb272e5..5dc3855e 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -48,7 +48,6 @@ - @@ -60,6 +59,14 @@ NotUsing + + NotUsing + NotUsing + + + NotUsing + NotUsing + @@ -75,7 +82,6 @@ - diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln index 62d55f52..ed6aac0f 100644 --- a/cleo_plugins/CLEO_Plugins.sln +++ b/cleo_plugins/CLEO_Plugins.sln @@ -15,6 +15,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryOperations", "MemoryO EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Audio", "Audio\Audio.vcxproj", "{897344A5-1AF1-493A-8B0B-196C0423D5DA}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Text", "Text\Text.vcxproj", "{BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x86 = Debug|x86 @@ -45,6 +47,10 @@ Global {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Debug|x86.Build.0 = Debug|Win32 {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Release|x86.ActiveCfg = Release|Win32 {897344A5-1AF1-493A-8B0B-196C0423D5DA}.Release|x86.Build.0 = Release|Win32 + {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Debug|x86.ActiveCfg = Debug|Win32 + {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Debug|x86.Build.0 = Debug|Win32 + {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Release|x86.ActiveCfg = Release|Win32 + {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index bd02cc15..dd3857ab 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -290,12 +290,7 @@ class FileSystemOperations static OpcodeResult WINAPI opcode_0AD7(CRunningScript* thread) { auto handle = READ_FILE_HANDLE_PARAM(); - - char* buffer = nullptr; - int bufferSize = 0; - DWORD needsTerminator = TRUE; - CLEO_ReadStringParamWriteBuffer(thread, &buffer, &bufferSize, &needsTerminator); - + auto result = OPCODE_READ_PARAM_OUTPUT_VAR_STRING(); auto size = OPCODE_READ_PARAM_INT(); if (size < 0) @@ -311,7 +306,7 @@ class FileSystemOperations } // use caller's size argument, ignoring actual target type size. Intended for legacy reasons. - bool ok = File::readString(handle, buffer, size) != nullptr; + bool ok = File::readString(handle, result.data, size) != nullptr; OPCODE_CONDITION_RESULT(ok); return OR_CONTINUE; diff --git a/source/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp similarity index 50% rename from source/CTextManager.cpp rename to cleo_plugins/Text/CTextManager.cpp index c43904d4..576932ed 100644 --- a/source/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -1,113 +1,26 @@ -#include "stdafx.h" - #include "CTextManager.h" -#include "CleoBase.h" -#include "FileEnumerator.h" +#include "..\cleo_sdk\CLEO_Utils.h" +#include "CFileMgr.h" +#include "CText.h" +#include +#include +#include #include +#include #include -#include +#include + +namespace FS = std::filesystem; namespace CLEO { - CText *gameTexts; - char *cheatString; - BYTE *mpackNumber; - - void(__cdecl * _PrintHelp)(const char *, bool sound, bool permanent, bool brief); - void(__cdecl * _PrintBig)(const char *, unsigned time, unsigned style); - void(__cdecl * _Print) (const char *, unsigned time, bool flag1, bool flag2); - void(__cdecl * _PrintNow) (const char *, unsigned time, bool flag1, bool flag2); - const char* (__fastcall * CText__Get)(CText*, int dummy, const char*); - DWORD _CText__TKey__locate; - - char message_buf_big[7][MAX_STR_LEN]; - char message_buf_low[MAX_STR_LEN]; - char message_buf_high[MAX_STR_LEN]; - - const char * __fastcall CText__TKey__locate(CText__TKey *key, int dummy, const char *gxt, bool& found) - { - const char * result; - _asm - { - mov ecx, key - push found - push gxt - call _CText__TKey__locate - mov result, eax - } - return result; - } - - void PrintHelp(const char *text, bool bPermanent, bool bBeep, bool bAddBrief) - { - _PrintHelp(text, bBeep, bPermanent, bAddBrief); - } - - void ClearHelp() - { - _PrintHelp(nullptr, false, false, false); - } - - void PrintBig(const char *text, unsigned time, unsigned style) - { - strcpy(message_buf_big[style - 1], text); - _PrintBig(message_buf_big[style - 1], time, style - 1); - } - - void Print(const char *text, unsigned time) - { - strcpy(message_buf_low, text); - _Print(message_buf_low, time, false, false); - } - - void PrintNow(const char *text, unsigned time) - { - strcpy(message_buf_high, text); - _PrintNow(message_buf_high, time, false, false); - } - - bool TestCheat(const char* cheat) - { - char *c = cheatString; - char buf[30]; - strcpy(buf, cheat); - char *s = _strrev(buf); - if (_strnicmp(s, c, strlen(s))) return false; - cheatString[0] = 0; - return true; - } - - const char * __fastcall CText__locate(CText *text, int dummy, const char *gxt) - { - bool bFound; - const char *szResult; - - if ((*gxt == '\0') || (*gxt == ' ')) return ""; - - szResult = GetInstance().TextManager.LocateFxt(gxt); - if (szResult) return szResult; - - szResult = CText__TKey__locate(&text->tkeyMain, 0, gxt, bFound); - - if (!bFound) - { - if (text->missionTableLoaded || *mpackNumber || text->haveTabl) - { - szResult = CText__TKey__locate(&text->tkeyMission, 0, gxt, bFound); - if (!bFound) return ""; - //else TRACE("Failed to find used text label '%s'", gxt); - } - } - return szResult; - } - CTextManager::CTextManager() : fxts(1, crc32FromUpcaseStdString) { } const char* CTextManager::Get(const char* key) { - return CText__Get(gameTexts, 0, key); + return TheText.Get(key); } bool CTextManager::AddFxt(const char *key, const char *value, bool dynamic) @@ -154,19 +67,15 @@ namespace CLEO void CTextManager::ClearDynamicFxts() { TRACE("Deleting dynamic fxts..."); - // size_t count = 0, total = fxts.size(); for (auto it = fxts.begin(); it != fxts.end();) { if (!it->second->is_static) { delete it->second; fxts.erase(it++); - // ++count; } else ++it; } - // TRACE("Deleting finished, %d elements erased, %d elements left", - // count, total - count); } CTextManager::~CTextManager() @@ -179,27 +88,29 @@ namespace CLEO fxts.erase(it++); ++count; } - // TRACE("Deleting finished, %d elements erased", count); } void CTextManager::LoadFxts() { + // create FXT directory if not present yet + FS::create_directory(FS::path(CFileMgr::ms_rootDirName).append("cleo\\cleo_text")); + // load whole FXT files directory - auto path = FS::path(Filepath_Cleo).append("cleo_text").string(); - FilesWalk(path.c_str(), ".fxt", [this](const char* fullPath, const char* filename) + auto list = CLEO::CLEO_ListDirectory(nullptr, "cleo\\cleo_text\\*.fxt", false, true); + for (DWORD i = 0; i < list.count; i++) { - TRACE("Parsing FXT file %s", fullPath); try { - std::ifstream stream(fullPath); + std::ifstream stream(list.paths[i]); auto result = ParseFxtFile(stream); - TRACE("Added %d new FXT entries from file %s", result, fullPath); + TRACE("Added %d new FXT entries from file %s", result, list.paths[i]); } catch (std::exception& ex) { - LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", fullPath, ex.what()); + LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", list.paths[i], ex.what()); } - }); + } + CLEO::CLEO_ListDirectoryFree(list); } void CTextManager::Clear() @@ -207,22 +118,6 @@ namespace CLEO fxts.clear(); } - void CTextManager::Inject(CCodeInjector& inj) - { - TRACE("Injecting TextManager..."); - CGameVersionManager& gvm = GetInstance().VersionManager; - _PrintHelp = gvm.TranslateMemoryAddress(MA_TEXT_BOX_FUNCTION); - _PrintBig = gvm.TranslateMemoryAddress(MA_STYLED_TEXT_FUNCTION); - _Print = gvm.TranslateMemoryAddress(MA_TEXT_LOW_PRIORITY_FUNCTION); - _PrintNow = gvm.TranslateMemoryAddress(MA_TEXT_HIGH_PRIORITY_FUNCTION); - _CText__TKey__locate = gvm.TranslateMemoryAddress(MA_CTEXT_TKEY_LOCATE_FUNCTION); - gameTexts = gvm.TranslateMemoryAddress(MA_GAME_TEXTS); - cheatString = gvm.TranslateMemoryAddress(MA_CHEAT_STRING); - mpackNumber = gvm.TranslateMemoryAddress(MA_MPACK_NUMBER); - CText__Get = gvm.TranslateMemoryAddress(MA_CALL_CTEXT_LOCATE); - inj.InjectFunction(CText__locate, CText__Get); - } - CTextManager::FxtEntry::FxtEntry(const char *_text, bool _static) : text(_text), is_static(_static) { } @@ -237,8 +132,10 @@ namespace CLEO while (true) { if (stream.eof()) break; + stream.getline(buf, sizeof(buf)); if (stream.fail()) break; + // parse extracted line key_start = key_iterator = buf; while (*key_iterator) diff --git a/source/CTextManager.h b/cleo_plugins/Text/CTextManager.h similarity index 57% rename from source/CTextManager.h rename to cleo_plugins/Text/CTextManager.h index 1ec67c1a..5aa76e91 100644 --- a/source/CTextManager.h +++ b/cleo_plugins/Text/CTextManager.h @@ -1,13 +1,12 @@ #pragma once -#include "stdafx.h" -#include "CCodeInjector.h" +#include "CText.h" #include "crc32.h" #include #include namespace CLEO { - class CTextManager : VInjectible + class CTextManager { class FxtEntry { @@ -17,11 +16,11 @@ namespace CLEO FxtEntry(const char *_text, bool _static = false); }; - typedef std::unordered_map fxt_map_type; + typedef std::unordered_map fxt_map_type; typedef fxt_map_type::iterator fxt_iterator; typedef fxt_map_type::const_iterator const_fxt_iterator; fxt_map_type fxts; + public: CTextManager(); ~CTextManager(); @@ -37,16 +36,5 @@ namespace CLEO // erase all fxts, added by scripts void ClearDynamicFxts(); size_t ParseFxtFile(std::istream& stream); - virtual void Inject(CCodeInjector& inj); }; - - void PrintHelp(const char *text, bool bPermanent = false, bool bBeep = true, bool bAddBrief = false); - void ClearHelp(); - void PrintBig(const char *text, unsigned time, unsigned style); - void Print(const char *text, unsigned time); - void PrintNow(const char *text, unsigned time); - - bool TestCheat(const char* cheat); - extern CText * gameTexts; - const char * __fastcall CText__locate(CText *text, int dummy, const char *gxt); } diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp new file mode 100644 index 00000000..ea8ace2f --- /dev/null +++ b/cleo_plugins/Text/Text.cpp @@ -0,0 +1,424 @@ +#include "plugin.h" +#include "CLEO.h" +#include "CLEO_Utils.h" +#include "CHud.h" +#include "CGame.h" +#include "CMessages.h" +#include "CModelInfo.h" +#include "CText.h" +#include "CTextManager.h" +#include + +using namespace CLEO; +using namespace plugin; + +class Text +{ +public: + static CTextManager textManager; + + static char msgBuffLow[MAX_STR_LEN + 1]; + static char msgBuffHigh[MAX_STR_LEN + 1]; + static const size_t MsgBigStyleCount = 7; + static char msgBuffBig[MsgBigStyleCount][MAX_STR_LEN + 1]; + + Text() + { + auto cleoVer = CLEO_GetVersion(); + if (cleoVer < CLEO_VERSION) + { + auto err = StringPrintf("This plugin requires version %X or later! \nCurrent version of CLEO is %X.", CLEO_VERSION >> 8, cleoVer >> 8); + MessageBox(HWND_DESKTOP, err.c_str(), TARGET_NAME, MB_SYSTEMMODAL | MB_ICONERROR); + return; + } + + //register opcodes + CLEO_RegisterOpcode(0x0ACA, opcode_0ACA); // print_help_string + CLEO_RegisterOpcode(0x0ACB, opcode_0ACB); // print_big_string + CLEO_RegisterOpcode(0x0ACC, opcode_0ACC); // print_string + CLEO_RegisterOpcode(0x0ACD, opcode_0ACD); // print_string_now + CLEO_RegisterOpcode(0x0ACE, opcode_0ACE); // print_help_formatted + CLEO_RegisterOpcode(0x0ACF, opcode_0ACF); // print_big_formatted + CLEO_RegisterOpcode(0x0AD0, opcode_0AD0); // print_formatted + CLEO_RegisterOpcode(0x0AD1, opcode_0AD1); // print_formatted_now + + CLEO_RegisterOpcode(0x0AD3, opcode_0AD3); // string_format + CLEO_RegisterOpcode(0x0AD4, opcode_0AD4); // scan_string + CLEO_RegisterOpcode(0x0ADB, opcode_0ADB); // get_name_of_vehicle_model + + CLEO_RegisterOpcode(0x0ADE, opcode_0ADE); // get_text_label_string + CLEO_RegisterOpcode(0x0ADF, opcode_0ADF); // add_text_label + CLEO_RegisterOpcode(0x0AE0, opcode_0AE0); // remove_text_label + + CLEO_RegisterOpcode(0x0AED, opcode_0AED); // string_float_format + + CLEO_RegisterOpcode(0x2600, opcode_2600); // is_text_empty + CLEO_RegisterOpcode(0x2601, opcode_2601); // is_text_equal + CLEO_RegisterOpcode(0x2602, opcode_2602); // is_text_in_text + CLEO_RegisterOpcode(0x2603, opcode_2603); // is_text_prefix + CLEO_RegisterOpcode(0x2604, opcode_2604); // is_text_sufix + + // register event callbacks + CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); + CLEO_RegisterCallback(eCallbackId::GameEnd, OnGameEnd); + + // install hooks + MemPatchJump(0x006A0050, &HOOK_CTextGet); // FUNC_CText__Get from CText.cpp + } + + static void __stdcall OnGameBegin(DWORD saveSlot) + { + textManager.LoadFxts(); + } + + static void __stdcall OnGameEnd() + { + textManager.Clear(); + } + + // hook of game's CText::Get + static const char* __fastcall HOOK_CTextGet(CText* text, int dummy, const char* gxt) + { + if ((gxt[0] == '\0') || (gxt[0] == ' ')) return ""; + + auto result = Text::textManager.LocateFxt(gxt); + if (result != nullptr) return result; + + bool found; + result = text->tkeyMain.GetTextByLabel(gxt, &found); + if (found) return result; + + if (text->missionTableLoaded || CGame::bMissionPackGame || text->haveTabl) + { + result = text->tkeyMission.GetTextByLabel(gxt, &found); + if (found) return result; + } + + return ""; + } + + //0ACA=1,show_text_box %1d% + static OpcodeResult __stdcall opcode_0ACA(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(text); + + CHud::SetHelpMessage(text, true, false, false); + return OR_CONTINUE; + } + + //0ACB=3,show_styled_text %1d% time %2d% style %3d% + static OpcodeResult __stdcall opcode_0ACB(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(text); + auto time = OPCODE_READ_PARAM_INT(); + auto style = OPCODE_READ_PARAM_INT(); + + auto styleIdx = std::clamp(style, 0, (int)MsgBigStyleCount - 1); + strncpy(msgBuffBig[styleIdx], text, sizeof(msgBuffBig[styleIdx])); + CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style); + return OR_CONTINUE; + } + + //0ACC=2,show_text_lowpriority %1d% time %2d% + static OpcodeResult __stdcall opcode_0ACC(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(text); + auto time = OPCODE_READ_PARAM_INT(); + + strncpy(msgBuffLow, text, sizeof(msgBuffLow)); + CMessages::AddMessage(msgBuffLow, time, false, false); + return OR_CONTINUE; + } + + //0ACD=2,show_text_highpriority %1d% time %2d% + static OpcodeResult __stdcall opcode_0ACD(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(text); + auto time = OPCODE_READ_PARAM_INT(); + + strncpy(msgBuffHigh, text, sizeof(msgBuffHigh)); + CMessages::AddMessageJumpQ(msgBuffHigh, time, false, false); + return OR_CONTINUE; + } + + //0ACE=-1,show_formatted_text_box %1d% + static OpcodeResult __stdcall opcode_0ACE(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING_FORMATTED(text); + + CHud::SetHelpMessage(text, true, false, false); + return OR_CONTINUE; + } + + //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% + static OpcodeResult __stdcall opcode_0ACF(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(format); + auto time = OPCODE_READ_PARAM_INT(); + auto style = OPCODE_READ_PARAM_INT(); + OPCODE_READ_PARAMS_FORMATTED(format, text); + + auto styleIdx = std::clamp(style, 0, (int)MsgBigStyleCount - 1); + strncpy(msgBuffBig[styleIdx], text, sizeof(msgBuffBig[styleIdx])); + CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style); + return OR_CONTINUE; + } + + //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% + static OpcodeResult __stdcall opcode_0AD0(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(format); + auto time = OPCODE_READ_PARAM_INT(); + OPCODE_READ_PARAMS_FORMATTED(format, text); + + strncpy(msgBuffLow, text, sizeof(msgBuffLow)); + CMessages::AddMessage(msgBuffLow, time, false, false); + return OR_CONTINUE; + } + + //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% + static OpcodeResult __stdcall opcode_0AD1(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(format); + auto time = OPCODE_READ_PARAM_INT(); + OPCODE_READ_PARAMS_FORMATTED(format, text); + + strncpy(msgBuffHigh, text, sizeof(msgBuffHigh)); + CMessages::AddMessageJumpQ(msgBuffHigh, time, false, false); + return OR_CONTINUE; + } + + //0AD3=-1,string %1d% format %2d% ... + static OpcodeResult __stdcall opcode_0AD3(CRunningScript* thread) + { + auto result = OPCODE_READ_PARAM_OUTPUT_VAR_STRING(); + OPCODE_READ_PARAM_STRING_FORMATTED(text); + + OPCODE_WRITE_PARAM_VAR_STRING(result, text); + return OR_CONTINUE; + } + + //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET + static OpcodeResult __stdcall opcode_0AD4(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(src); + OPCODE_READ_PARAM_STRING(format); + + auto readCount = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); // store_to + + // collect provided by caller store_to variables + size_t outputParamCount = 0; + SCRIPT_VAR* outputParams[35]; + struct StringParamDesc + { + bool used = false; + StringParamBufferInfo target; + std::string str; + } stringParams[35]; + + for (int i = 0; i < 35; i++) + { + auto paramType = thread->PeekDataType(); + + if (paramType == DT_END) + { + outputParams[i] = nullptr; + continue; + } + + if (IsVarString(paramType)) + { + if (IsLegacyScript(thread)) + { + // older CLEOs did not carred about string variable size limitations + // just give pointer to the variable's data and allow overflow depending on input data + outputParams[i] = CLEO_GetPointerToScriptVariable(thread); + } + else + { + stringParams[i].used = true; + stringParams[i].target = OPCODE_READ_PARAM_OUTPUT_VAR_STRING(); + + stringParams[i].str.resize(MAX_STR_LEN); // temp storage + outputParams[i] = (SCRIPT_VAR*)stringParams[i].str.data(); + } + } + else + { + outputParams[i] = OPCODE_READ_PARAM_OUTPUT_VAR_ANY32(); + } + + outputParamCount++; + } + CLEO_SkipUnusedVarArgs(thread); // and var args terminator + + *readCount = sscanf(src, format, + outputParams[0], outputParams[1], outputParams[2], outputParams[3], outputParams[4], outputParams[5], + outputParams[6], outputParams[7], outputParams[8], outputParams[9], outputParams[10], outputParams[11], + outputParams[12], outputParams[13], outputParams[14], outputParams[15], outputParams[16], outputParams[17], + outputParams[18], outputParams[19], outputParams[20], outputParams[21], outputParams[22], outputParams[23], + outputParams[24], outputParams[25], outputParams[26], outputParams[27], outputParams[28], outputParams[29], + outputParams[30], outputParams[31], outputParams[32], outputParams[33], outputParams[34]); + + // transfer string params to target variables + for (auto& p : stringParams) + { + if (p.used) OPCODE_WRITE_PARAM_VAR_STRING(p.target, p.str.c_str()); + } + + OPCODE_CONDITION_RESULT(outputParamCount == *readCount); + return OR_CONTINUE; + } + + //0ADB=2,%2d% = car_model %1d% name + static OpcodeResult __stdcall opcode_0ADB(CRunningScript* thread) + { + auto modelIndex = OPCODE_READ_PARAM_UINT(); + + CVehicleModelInfo* model; + // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster + if (CLEO_GetGameVersion() == CLEO::GV_US10) + model = plugin::CallAndReturn(modelIndex); + else + model = reinterpret_cast(CModelInfo::ms_modelInfoPtrs[modelIndex]); + + auto str = std::string(std::string_view(model->m_szGameName, sizeof(model->m_szGameName))); // to proper cstr + + OPCODE_WRITE_PARAM_STRING(str.c_str()); + return OR_CONTINUE; + } + + //0ADE=2,%2d% = text_by_GXT_entry %1d% + static OpcodeResult __stdcall opcode_0ADE(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long + + auto txt = textManager.Get(gxt); + + if (IsVarString(thread->PeekDataType())) + { + OPCODE_WRITE_PARAM_STRING(txt); + } + else + { + OPCODE_WRITE_PARAM_PTR(txt); // address of the text + } + return OR_CONTINUE; + } + + //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% + static OpcodeResult __stdcall opcode_0ADF(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long + OPCODE_READ_PARAM_STRING(txt); + + textManager.AddFxt(gxt, txt); + return OR_CONTINUE; + } + + //0AE0=1,remove_dynamic_GXT_entry %1d% + static OpcodeResult __stdcall opcode_0AE0(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long + + textManager.RemoveFxt(gxt); + return OR_CONTINUE; + } + + //0AED=3,%3d% = float %1d% to_string_format %2d% + static OpcodeResult __stdcall opcode_0AED(CRunningScript* thread) + { + // this opcode is useless now + auto val = OPCODE_READ_PARAM_FLOAT(); + OPCODE_READ_PARAM_STRING(format); + + char result[32]; + sprintf_s(result, sizeof(result), format, val); + + OPCODE_WRITE_PARAM_STRING(result); + return OR_CONTINUE; + } + + //2600=1, is_text_empty %1s% + static OpcodeResult __stdcall opcode_2600(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(str); + + OPCODE_CONDITION_RESULT(str[0] == '\0'); + return OR_CONTINUE; + } + + //2601=3, is_text_equal %1s% another %2s% ignore_case %3d% + static OpcodeResult __stdcall opcode_2601(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(a); + OPCODE_READ_PARAM_STRING(b); + auto ignoreCase = OPCODE_READ_PARAM_BOOL(); + + auto result = ignoreCase ? _stricmp(a, b) : strcmp(a, b); + + OPCODE_CONDITION_RESULT(result == 0); + return OR_CONTINUE; + } + + //2602=3, is_text_in_text %1s% sub_text %2s% ignore_case %3d% + static OpcodeResult __stdcall opcode_2602(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(str); + OPCODE_READ_PARAM_STRING(substr); + auto ignoreCase = OPCODE_READ_PARAM_BOOL(); + + if (substr[0] == '\0') + { + OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; + } + + auto result = ignoreCase ? StrStrIA(str, substr) : strstr(str, substr); + + OPCODE_CONDITION_RESULT(result != nullptr); + return OR_CONTINUE; + } + + //2603=3, is_text_prefix %1s% prefix %2s% ignore_case %3d% + static OpcodeResult __stdcall opcode_2603(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(str); + OPCODE_READ_PARAM_STRING(prefix); + auto ignoreCase = OPCODE_READ_PARAM_BOOL(); + + auto prefixLen = strlen(prefix); + auto result = ignoreCase ? _strnicmp(str, prefix, prefixLen) : strncmp(str, prefix, prefixLen); + + OPCODE_CONDITION_RESULT(result == 0); + return OR_CONTINUE; + } + + //2604=3, is_text_sufix %1s% sufix %2s% ignore_case %3d% + static OpcodeResult __stdcall opcode_2604(CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(str); + OPCODE_READ_PARAM_STRING(sufix); + auto ignoreCase = OPCODE_READ_PARAM_BOOL(); + + auto strLen = strlen(str); + auto sufixLen = strlen(sufix); + + if (sufixLen > strLen) + { + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + + auto offset = strLen - sufixLen; + auto result = ignoreCase ? _stricmp(str + offset, sufix) : strcmp(str + offset, sufix); + + OPCODE_CONDITION_RESULT(result == 0); + return OR_CONTINUE; + } +} textInstance; + +CTextManager Text::textManager; +char Text::msgBuffLow[MAX_STR_LEN + 1]; +char Text::msgBuffHigh[MAX_STR_LEN + 1]; +char Text::msgBuffBig[MsgBigStyleCount][MAX_STR_LEN + 1]; diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj new file mode 100644 index 00000000..785163b2 --- /dev/null +++ b/cleo_plugins/Text/Text.vcxproj @@ -0,0 +1,139 @@ + + + + + Release + Win32 + + + Debug + Win32 + + + + {bd19aefd-626b-40ae-8d83-6d444d2efbf8} + true + Win32Proj + Text + 10.0 + + + + DynamicLibrary + false + MultiByte + v143 + true + + + DynamicLibrary + true + MultiByte + v143 + + + + + + + + + + + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + SA.Text + .cleo + + + $(SolutionDir).output\ + $(ProjectDir).obj\$(Configuration)\ + SA.Text + .cleo + + + $(GTA_SA_DIR)\gta_sa.exe + $(GTA_SA_DIR) + false + WindowsLocalDebugger + + + + Level3 + MaxSpeed + true + true + true + MultiThreaded + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk + _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 + + + true + true + true + UseLinkTimeCodeGeneration + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;Shlwapi.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +if defined GTA_SA_DIR ( + xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) + + + + + Level3 + Disabled + true + MultiThreadedDebug + $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk + _DEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" + /Zc:threadSafeInit- %(AdditionalOptions) + stdcpp17 + + + true + Default + $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) + cleo.lib;Shlwapi.lib;%(AdditionalDependencies) + Windows + + + taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" +if defined GTA_SA_DIR ( +xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" +) + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cleo_plugins/Text/Text.vcxproj.filters b/cleo_plugins/Text/Text.vcxproj.filters new file mode 100644 index 00000000..4c8f5334 --- /dev/null +++ b/cleo_plugins/Text/Text.vcxproj.filters @@ -0,0 +1,53 @@ + + + + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + plugin_sdk + + + + + + cleo_sdk + + + cleo_sdk + + + + + + {ac453a2b-f585-4a63-8f3b-70646198f496} + + + {72d65844-4b83-4ef4-9736-ff17836a8cfa} + + + \ No newline at end of file diff --git a/cleo_plugins/Text/crc32.cpp b/cleo_plugins/Text/crc32.cpp new file mode 100644 index 00000000..84759b4c --- /dev/null +++ b/cleo_plugins/Text/crc32.cpp @@ -0,0 +1,91 @@ +#include "crc32.h" +#include + +static const unsigned long crcTable[256] = { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +}; + +unsigned long crc32FromUpcaseString(const char *str) +{ + unsigned long crc = 0xFFFFFFFF; + while (*str) + crc = crcTable[(crc^toupper(*str++)) & 0xff] ^ (crc >> 8); + return crc; +} + +unsigned long crc32FromUpcaseStdString(const std::string& str) +{ + return crc32FromUpcaseString(str.c_str()); +} + +unsigned long crc32FromString(const char *str) +{ + unsigned long crc = 0xFFFFFFFF; + while (*str) + crc = crcTable[(crc ^ (*str++)) & 0xff] ^ (crc >> 8); + return crc; +} + +unsigned long crc32FromStdString(const std::string& str) +{ + return crc32FromString(str.c_str()); +} + +unsigned long crc32(const unsigned char *buf, unsigned long len) +{ + unsigned long crc = 0xFFFFFFFF; + for (unsigned i = 0; i < len; i++) + crc = crcTable[(crc^buf[i]) & 0xFF] ^ (crc >> 8); + return crc; +} diff --git a/cleo_plugins/Text/crc32.h b/cleo_plugins/Text/crc32.h new file mode 100644 index 00000000..5cede707 --- /dev/null +++ b/cleo_plugins/Text/crc32.h @@ -0,0 +1,8 @@ +#pragma once +#include + +unsigned long crc32(const unsigned char *buf, unsigned long len); +unsigned long crc32FromUpcaseString(const char *str); +unsigned long crc32FromUpcaseStdString(const std::string& str); +unsigned long crc32FromString(const char *str); +unsigned long crc32FromStdString(const std::string& str); diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index cbe5c70e..736c7e4c 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -494,7 +494,7 @@ DWORD WINAPI CLEO_GetIntOpcodeParam(CRunningScript* thread); float WINAPI CLEO_GetFloatOpcodeParam(CRunningScript* thread); LPCSTR WINAPI CLEO_ReadStringOpcodeParam(CRunningScript* thread, char* buff = nullptr, int buffSize = 0); // read always null-terminated string into buffer, clamped to its size. If no buffer provided then internal, globally shared by all CLEO_ReadStringOpcodeParam calls, is used. Returns pointer to the result buffer or nullptr on fail LPCSTR WINAPI CLEO_ReadStringPointerOpcodeParam(CRunningScript* thread, char* buff = nullptr, int buffSize = 0); // read always null-terminated string into buffer, clamped to its size. If no buffer provided then internal, globally shared by all CLEO_ReadStringPointerOpcodeParam calls, is used. WARNING: returned pointer may differ from buff and contain string longer than buffSize (ptr to original data source) -void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param +void WINAPI CLEO_ReadStringParamWriteBuffer(CRunningScript* thread, char** outBuf, int* outBufSize, BOOL* outNeedsTerminator); // get info about the string opcode param, so it can be written latter. If outNeedsTerminator is not 0 then whole bufSize can be used as text characters. Advances script to next param char* WINAPI CLEO_ReadParamsFormatted(CRunningScript* thread, const char* format, char* buf = nullptr, int bufSize = 0); // consumes all var-arg params and terminator // get param value without advancing the script DWORD WINAPI CLEO_PeekIntOpcodeParam(CRunningScript* thread); @@ -528,7 +528,11 @@ DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret R DWORD WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * +// Should be always used when working with files. Provides ModLoader compatibility void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path +struct DirectoryList{ DWORD count; char** paths; }; +DirectoryList WINAPI CLEO_ListDirectory(CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles); // thread can be null, searchPath can contain wildcards. After use CLEO_ListDirectoryFree must be called on returned DirectoryList to free allocated resources +void WINAPI CLEO_ListDirectoryFree(DirectoryList list); // releases resources allocated by CLEO_ListDirectory void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index b55413aa..b48efeb2 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -34,15 +34,19 @@ namespace CLEO OPCODE_READ_PARAM_ANY32() // get raw data of any simple-type value (practically integers and floats) OPCODE_READ_PARAM_STRING(varName) // reads param and creates const char* variable named 'varName' with pointer to null-terminated string OPCODE_READ_PARAM_STRING_LEN(varName, maxLength) // same as above, but text length is clamped to maxLength + OPCODE_READ_PARAM_STRING_FORMATTED(varName) // reads "format" string argument, then all var-args. Creates variable named 'varName' containing formatted text. Creates 'varNameOk' variable with pointer to the text, or nullptr if user provided invalid arguments + OPCODE_READ_PARAMS_FORMATTED(format, varName) // reads all var-args and tries to put them into formatted string. Creates variable named 'varName' containing formatted text. Creates 'varNameOk' variable with pointer to the text, or nullptr if user provided invalid arguments OPCODE_READ_PARAM_FILEPATH(varName) // reads param and creates const char* variable named 'varName' with pointer to resolved, null-terminated, filepath OPCODE_READ_PARAM_PTR() // read and validate memory address argument OPCODE_READ_PARAM_OBJECT_HANDLE() // read and validate game object handle OPCODE_READ_PARAM_PED_HANDLE() // read and validate character (ped/actor) handle OPCODE_READ_PARAM_VEHICLE_HANDLE() // read and validate vehicle handle + // for opcodes with mixed params order, where 'strore_to' occurs before input arguments OPCODE_READ_PARAM_OUTPUT_VAR_INT() // get pointer to integer variable param to write result later OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() // get pointer to float variable param to write result later OPCODE_READ_PARAM_OUTPUT_VAR_ANY32() // get pointer to simple-type variable param to write result later + OPCODE_READ_PARAM_OUTPUT_VAR_STRING() // returns instance of StringParamBufferInfo used to write string param later // writing opcode output/result data OPCODE_WRITE_PARAM_BOOL(value) @@ -55,6 +59,7 @@ namespace CLEO OPCODE_WRITE_PARAM_FLOAT(value) OPCODE_WRITE_PARAM_ANY32(value) // write raw data into simple-type variable (practically integers and floats) OPCODE_WRITE_PARAM_STRING(value) + OPCODE_WRITE_PARAM_STRING_INFO(info, value) // write param using info object revceived from OPCODE_READ_PARAM_OUTPUT_VAR_STRING OPCODE_WRITE_PARAM_PTR(value) // memory address */ @@ -206,6 +211,38 @@ namespace CLEO return info; } + struct StringParamBufferInfo + { + char* data = nullptr; + int size = 0; + BOOL needTerminator = false; + }; + + static void MemPatchJump(size_t position, void* jumpTarget) + { + DWORD oldProtect; + VirtualProtect((LPVOID)position, 5, PAGE_EXECUTE_READWRITE, &oldProtect); + + *(BYTE*)position = 0xE9; // asm: jmp + position += sizeof(BYTE); + + *(DWORD*)position = (DWORD)jumpTarget - position - 4; + } + + static void* MemPatchCall(size_t position, void* newFunction) + { + DWORD oldProtect; + VirtualProtect((LPVOID)position, 5, PAGE_EXECUTE_READWRITE, &oldProtect); + + *(BYTE*)position = 0xE8; // asm: call + position += sizeof(BYTE); + + DWORD original = *(DWORD*)position + position + 4; + *(DWORD*)position = (DWORD)newFunction - position - 4; + + return (void*)original; + } + #define TRACE(format,...) {CLEO::Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} #define LOG_WARNING(script, format, ...) {CLEO::Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} #define SHOW_ERROR(a,...) {CLEO::ShowError(a, __VA_ARGS__);} @@ -254,6 +291,16 @@ namespace CLEO return CLEO_GetPointerToScriptVariable(thread); } + static StringParamBufferInfo _readParamStringInfo(CRunningScript* thread) + { + _lastParamType = thread->PeekDataType(); + _lastParamArrayType = IsArray(_lastParamType) ? thread->PeekArrayDataType() : eArrayDataType::ADT_NONE; + + StringParamBufferInfo result; + CLEO_ReadStringParamWriteBuffer(thread, &result.data, &result.size, &result.needTerminator); + return result; + } + static void _writeParamPtr(CRunningScript* thread, void* valuePtr) { _lastParamType = thread->PeekDataType(); @@ -301,7 +348,7 @@ namespace CLEO } if (IsVarString(_lastParamType)) return true; - if (!output && IsImmString(_lastParamType)) return true; + if (/*!output &&*/ IsImmString(_lastParamType)) return true; // allow writing strings into const addresses // pointer to output buffer if (IsVariable(_lastParamType)) return true; @@ -340,6 +387,40 @@ namespace CLEO return str; } + static bool _writeParamText(CLEO::CRunningScript* thread, const StringParamBufferInfo& target, const char* str) + { + if (str != nullptr && (size_t)str <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' source pointer of output string argument %s in script %s \nScript suspended.", str, GetParamInfo(1).c_str(), ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + + if ((size_t)target.data <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' target pointer of output string argument in script %s \nScript suspended.", str, ScriptInfoStr(thread).c_str()); + thread->Suspend(); + return false; + } + + if (target.size == 0) + { + return true; // done + } + + bool addTerminator = target.needTerminator; + size_t buffLen = target.size - addTerminator; + size_t length = str == nullptr ? 0 : strlen(str); + + if (buffLen > length) addTerminator = true; // there is space left for terminator + + length = min(length, buffLen); + if (length > 0) std::memcpy(target.data, str, length); + if (addTerminator) target.data[length] = '\0'; + + return true; + } + static bool _writeParamText(CRunningScript* thread, const char* str) { _lastParamType = thread->PeekDataType(); @@ -371,33 +452,9 @@ namespace CLEO } } - char* buff = nullptr; - int size = 0; - DWORD needTerminator = false; - CLEO_ReadStringParamWriteBuffer(thread, &buff, &size, &needTerminator); - - if (buff == nullptr) // all error types already handled, but check just in case - { - SHOW_ERROR("Invalid output argument %s in script %s\nScript suspended.", GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); - thread->Suspend(); - return false; - } - - if (size == 0) - { - return true; // done - } - - bool addTerminator = needTerminator; - size_t buffLen = size - addTerminator; - size_t length = str == nullptr ? 0 : strlen(str); - - if (buffLen > length) addTerminator = true; // there is space left for terminator - - length = min(length, buffLen); - if (length > 0) std::memcpy(buff, str, length); - if (addTerminator) buff[length] = '\0'; - return true; // done + StringParamBufferInfo info; + CLEO_ReadStringParamWriteBuffer(thread, &info.data, &info.size, &info.needTerminator); + return _writeParamText(thread, info, str); // done } #define OPCODE_SKIP_PARAMS(_count) CLEO_SkipOpcodeParams(thread, _count) @@ -436,6 +493,11 @@ namespace CLEO #define OPCODE_READ_PARAM_STRING_LEN(_varName, _maxLen) char _buff_##_varName[_maxLen + 1]; const char* ##_varName = _readParamText(thread, _buff_##_varName, _maxLen + 1); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } + #define OPCODE_READ_PARAM_STRING_FORMATTED(_varName) char _buff_format_##_varName[MAX_STR_LEN + 1]; const char* _format_##_varName = _readParamText(thread, _buff_format_##_varName, MAX_STR_LEN + 1); if(!_paramWasString()) { return OpcodeResult::OR_INTERRUPT; } \ + char _varName[2 * MAX_STR_LEN + 1]; char* _varName##Ok = CLEO_ReadParamsFormatted(thread, _buff_format_##_varName, _varName, sizeof(_varName)); + + #define OPCODE_READ_PARAMS_FORMATTED(_format, _varName) char _varName[2 * MAX_STR_LEN + 1]; char* _varName##Ok = CLEO_ReadParamsFormatted(thread, _format, _varName, sizeof(_varName)); + #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ @@ -465,37 +527,43 @@ namespace CLEO if (!_paramWasVariable()) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + #define OPCODE_READ_PARAM_OUTPUT_VAR_STRING() _readParamStringInfo(thread); \ + if (!_paramWasString(true)) { SHOW_ERROR("Output argument %s expected to be variable string, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + + // macros for writing opcode output params. Performs type validation, throws error and suspends script if user provided invalid argument type - #define OPCODE_WRITE_PARAM_BOOL(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_BOOL(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_INT8(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_INT8(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_UINT8(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_UINT8(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_INT16(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_INT16(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_UINT16(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_UINT16(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_INT(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_INT(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_UINT(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_UINT(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_ANY32(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_ANY32(_value) _writeParam(thread, _value); \ if (!_paramWasInt(true) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be int or float variable, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_FLOAT(value) _writeParam(thread, value); \ + #define OPCODE_WRITE_PARAM_FLOAT(_value) _writeParam(thread, _value); \ if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - #define OPCODE_WRITE_PARAM_STRING(value) if(!_writeParamText(thread, value)) { return OpcodeResult::OR_INTERRUPT; } + #define OPCODE_WRITE_PARAM_STRING(_value) if(!_writeParamText(thread, _value)) { return OpcodeResult::OR_INTERRUPT; } + + #define OPCODE_WRITE_PARAM_VAR_STRING(_info, _value) if(!_writeParamText(thread, _info, _value)) { return OpcodeResult::OR_INTERRUPT; } - #define OPCODE_WRITE_PARAM_PTR(value) _writeParamPtr(thread, (void*)value); \ + #define OPCODE_WRITE_PARAM_PTR(_value) _writeParamPtr(thread, (void*)_value); \ if (!_paramWasInt(true)) { SHOW_ERROR("Output argument %s expected to be variable int, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } } diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e01dc548..c05fd2c8 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -3,12 +3,14 @@ #include "CGameVersionManager.h" #include "CCustomOpcodeSystem.h" #include "ScmFunction.h" -#include "CTextManager.h" +#include "CCheat.h" #include "CModelInfo.h" #include #include #include +#include +#include #define OPCODE_VALIDATE_STR_ARG_WRITE(x) if((void*)x == nullptr) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } #define OPCODE_READ_FORMATTED_STRING(thread, buf, bufSize, format) if(ReadFormattedString(thread, buf, bufSize, format) == -1) { SHOW_ERROR("%s in script %s \nScript suspended.", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } @@ -23,55 +25,45 @@ namespace CLEO OpcodeResult __stdcall opcode_0051(CRunningScript * thread); // GOSUB return - OpcodeResult __stdcall opcode_0A92(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A93(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A94(CRunningScript *thread); - OpcodeResult __stdcall opcode_0A95(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA0(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AA9(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB0(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB2(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB4(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB5(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB6(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB7(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AB8(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABD(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABE(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ABF(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD2(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADB(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADD(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread); - OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AE1(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AE2(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AE3(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AED(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread); - OpcodeResult __stdcall opcode_0AEF(CRunningScript *thread); + OpcodeResult __stdcall opcode_0A92(CRunningScript* thread); // stream_custom_script + OpcodeResult __stdcall opcode_0A93(CRunningScript* thread); // terminate_this_custom_script + OpcodeResult __stdcall opcode_0A94(CRunningScript* thread); // load_and_launch_custom_mission + OpcodeResult __stdcall opcode_0A95(CRunningScript* thread); // save_this_custom_script + OpcodeResult __stdcall opcode_0AA0(CRunningScript* thread); // gosub_if_false + OpcodeResult __stdcall opcode_0AA1(CRunningScript* thread); // return_if_false + OpcodeResult __stdcall opcode_0AA9(CRunningScript* thread); // is_game_version_original + OpcodeResult __stdcall opcode_0AB0(CRunningScript* thread); // is_key_pressed + OpcodeResult __stdcall opcode_0AB1(CRunningScript* thread); // cleo_call + OpcodeResult __stdcall opcode_0AB2(CRunningScript* thread); // cleo_return + OpcodeResult __stdcall opcode_0AB3(CRunningScript* thread); // set_cleo_shared_var + OpcodeResult __stdcall opcode_0AB4(CRunningScript* thread); // get_cleo_shared_var + OpcodeResult __stdcall opcode_0AB5(CRunningScript* thread); // store_closest_entities + OpcodeResult __stdcall opcode_0AB6(CRunningScript* thread); // get_target_blip_coords + OpcodeResult __stdcall opcode_0AB7(CRunningScript* thread); // get_car_number_of_gears + OpcodeResult __stdcall opcode_0AB8(CRunningScript* thread); // get_car_current_gear + OpcodeResult __stdcall opcode_0ABA(CRunningScript* thread); // terminate_all_custom_scripts_with_this_name + OpcodeResult __stdcall opcode_0ABD(CRunningScript* thread); // is_car_siren_on + OpcodeResult __stdcall opcode_0ABE(CRunningScript* thread); // is_car_engine_on + OpcodeResult __stdcall opcode_0ABF(CRunningScript* thread); // cleo_set_car_engine_on + + OpcodeResult __stdcall opcode_0AD2(CRunningScript* thread); // get_char_player_is_targeting + + OpcodeResult __stdcall opcode_0ADC(CRunningScript* thread); // test_cheat + OpcodeResult __stdcall opcode_0ADD(CRunningScript* thread); // spawn_vehicle_by_cheating + + OpcodeResult __stdcall opcode_0AE1(CRunningScript* thread); // get_random_char_in_sphere_no_save_recursive + OpcodeResult __stdcall opcode_0AE2(CRunningScript* thread); // get_random_car_in_sphere_no_save_recursive + OpcodeResult __stdcall opcode_0AE3(CRunningScript* thread); // get_random_object_in_sphere_no_save_recursive + + OpcodeResult __stdcall opcode_0AEE(CRunningScript* thread); // pow + OpcodeResult __stdcall opcode_0AEF(CRunningScript* thread); // log OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform // 2000 free slot // 2001 free slot OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_fail + typedef void(*FuncScriptDeleteDelegateT) (CRunningScript *script); struct ScriptDeleteDelegate { std::vector funcs; @@ -119,7 +111,6 @@ namespace CLEO CHandling * Handling; CPlayerPed * (__cdecl * GetPlayerPed)(DWORD); - CBaseModelInfo **Models; void(__cdecl * SpawnCar)(DWORD); @@ -249,27 +240,12 @@ namespace CLEO CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); - CLEO_RegisterOpcode(0x0ACA, opcode_0ACA); - CLEO_RegisterOpcode(0x0ACB, opcode_0ACB); - CLEO_RegisterOpcode(0x0ACC, opcode_0ACC); - CLEO_RegisterOpcode(0x0ACD, opcode_0ACD); - CLEO_RegisterOpcode(0x0ACE, opcode_0ACE); - CLEO_RegisterOpcode(0x0ACF, opcode_0ACF); - CLEO_RegisterOpcode(0x0AD0, opcode_0AD0); - CLEO_RegisterOpcode(0x0AD1, opcode_0AD1); CLEO_RegisterOpcode(0x0AD2, opcode_0AD2); - CLEO_RegisterOpcode(0x0AD3, opcode_0AD3); - CLEO_RegisterOpcode(0x0AD4, opcode_0AD4); - CLEO_RegisterOpcode(0x0ADB, opcode_0ADB); CLEO_RegisterOpcode(0x0ADC, opcode_0ADC); CLEO_RegisterOpcode(0x0ADD, opcode_0ADD); - CLEO_RegisterOpcode(0x0ADE, opcode_0ADE); - CLEO_RegisterOpcode(0x0ADF, opcode_0ADF); - CLEO_RegisterOpcode(0x0AE0, opcode_0AE0); CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); - CLEO_RegisterOpcode(0x0AED, opcode_0AED); CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); @@ -308,7 +284,6 @@ namespace CLEO FindGroundZ = gvm.TranslateMemoryAddress(MA_FIND_GROUND_Z_FUNCTION); GetPlayerPed = gvm.TranslateMemoryAddress(MA_GET_PLAYER_PED_FUNCTION); Handling = gvm.TranslateMemoryAddress(MA_HANDLING); - Models = gvm.TranslateMemoryAddress(MA_MODELS); SpawnCar = gvm.TranslateMemoryAddress(MA_SPAWN_CAR_FUNCTION); // TODO: consider version-agnostic code @@ -529,12 +504,10 @@ namespace CLEO char* outIter = outputStr; char bufa[MAX_STR_LEN + 1], fmtbufa[64], *fmta; - CCustomOpcodeSystem::lastErrorMsg.clear(); - // invalid input arguments if(outputStr == nullptr || len == 0) { - CCustomOpcodeSystem::lastErrorMsg = "Need target buffer to read formatted string"; + LOG_WARNING(thread, "ReadFormattedString invalid input arg(s)"); SkipUnusedVarArgs(thread); return -1; // error } @@ -688,7 +661,7 @@ namespace CLEO { _ReadFormattedString_OutOfMemory: // jump here on error - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Target buffer too small (%d) to read whole formatted string", len); + LOG_WARNING(thread, "Target buffer too small (%d) to read whole formatted string in script %s", len, ((CCustomScript*)thread)->GetInfoStr().c_str()); SkipUnusedVarArgs(thread); outputStr[len - 1] = '\0'; return -1; // error @@ -697,8 +670,7 @@ namespace CLEO // still more var-args available if (thread->PeekDataType() != DT_END) { - CCustomOpcodeSystem::lastErrorMsg = "More params than slots in formatted string"; - LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(thread, "More params than slots in formatted string in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); } SkipUnusedVarArgs(thread); // skip terminator too @@ -706,7 +678,7 @@ namespace CLEO return (int)written; _ReadFormattedString_ArgMissing: // jump here on error - CCustomOpcodeSystem::lastErrorMsg = "Less params than slots in formatted string"; + LOG_WARNING(thread, "Less params than slots in formatted string in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); thread->IncPtr(); // skip vararg terminator outputStr[written] = '\0'; return -1; // error @@ -1349,89 +1321,6 @@ namespace CLEO return OR_CONTINUE; } - //0ACA=1,show_text_box %1d% - OpcodeResult __stdcall opcode_0ACA(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(text); - PrintHelp(text); - return OR_CONTINUE; - } - - //0ACB=3,show_styled_text %1d% time %2d% style %3d% - OpcodeResult __stdcall opcode_0ACB(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(text); - auto time = OPCODE_READ_PARAM_INT(); - auto style = OPCODE_READ_PARAM_INT(); - - PrintBig(text, time, style); - return OR_CONTINUE; - } - - //0ACC=2,show_text_lowpriority %1d% time %2d% - OpcodeResult __stdcall opcode_0ACC(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(text); - auto time = OPCODE_READ_PARAM_INT(); - - Print(text, time); - return OR_CONTINUE; - } - - //0ACD=2,show_text_highpriority %1d% time %2d% - OpcodeResult __stdcall opcode_0ACD(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(text); - auto time = OPCODE_READ_PARAM_INT(); - - PrintNow(text, time); - return OR_CONTINUE; - } - - //0ACE=-1,show_formatted_text_box %1d% - OpcodeResult __stdcall opcode_0ACE(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(format); - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - PrintHelp(text); - return OR_CONTINUE; - } - - //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% - OpcodeResult __stdcall opcode_0ACF(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(format); - auto time = OPCODE_READ_PARAM_INT(); - auto style = OPCODE_READ_PARAM_INT(); - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - PrintBig(text, time, style); - return OR_CONTINUE; - } - - //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% - OpcodeResult __stdcall opcode_0AD0(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(format); - auto time = OPCODE_READ_PARAM_INT(); - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - Print(text, time); - return OR_CONTINUE; - } - - //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% - OpcodeResult __stdcall opcode_0AD1(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(format); - auto time = OPCODE_READ_PARAM_INT(); - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - PrintNow(text, time); - return OR_CONTINUE; - } - //0AD2=2, %2d% = player %1d% targeted_actor //IF and SET OpcodeResult __stdcall opcode_0AD2(CRunningScript *thread) { @@ -1455,136 +1344,37 @@ namespace CLEO return OR_CONTINUE; } - //0AD3=-1,string %1d% format %2d% ... - OpcodeResult __stdcall opcode_0AD3(CRunningScript *thread) - { - auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) - OPCODE_READ_PARAM_STRING(format); - char text[MAX_STR_LEN]; OPCODE_READ_FORMATTED_STRING(thread, text, sizeof(text), format) - - WriteStringParam(resultArg, text); - return OR_CONTINUE; - } - - //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET - OpcodeResult __stdcall opcode_0AD4(CRunningScript *thread) + //0ADC=1, test_cheat %1d% + OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - OPCODE_READ_PARAM_STRING(src); - OPCODE_READ_PARAM_STRING(format); - - auto resultType = thread->PeekDataType(); - if (!IsVariable(resultType) && IsVarString(resultType)) - { - SHOW_ERROR("Result parameter must be variable type, received '%s' in script %s \nScript suspended.", ToKindStr(resultType), ((CCustomScript*)thread)->GetInfoStr().c_str()); - return thread->Suspend(); - } - int *result = (int *)GetScriptParamPointer(thread); + OPCODE_READ_PARAM_STRING(text); - // read extra params - size_t cExParams = 0; - SCRIPT_VAR *ExParams[35]; - for (int i = 0; i < 35; i++) + auto len = strlen(text); + if (_strnicmp(text, CCheat::m_CheatString, len) == 0) { - auto paramType = thread->PeekDataType(); - if (paramType != DT_END) - { - ExParams[i] = GetScriptParamPointer(thread); - cExParams++; - } - else ExParams[i] = nullptr; // clear unused args + CCheat::m_CheatString[0] = '\0'; // consume the cheat + SetScriptCondResult(thread, true); + return OR_CONTINUE; } - SkipUnusedVarArgs(thread); // and var args terminator - - *result = sscanf(src, format, - /* extra parameters (will be aligned automatically, but the limit of 35 elements maximum exists) */ - ExParams[0], ExParams[1], ExParams[2], ExParams[3], ExParams[4], ExParams[5], - ExParams[6], ExParams[7], ExParams[8], ExParams[9], ExParams[10], ExParams[11], - ExParams[12], ExParams[13], ExParams[14], ExParams[15], ExParams[16], ExParams[17], - ExParams[18], ExParams[19], ExParams[20], ExParams[21], ExParams[22], ExParams[23], - ExParams[24], ExParams[25], ExParams[26], ExParams[27], ExParams[28], ExParams[29], - ExParams[30], ExParams[31], ExParams[32], ExParams[33], ExParams[34]); - - SetScriptCondResult(thread, cExParams == *result); - return OR_CONTINUE; - } - - //0ADB=2,%2d% = car_model %1d% name - OpcodeResult __stdcall opcode_0ADB(CRunningScript *thread) - { - DWORD modelIndex; *thread >> modelIndex; - - CVehicleModelInfo* model; - // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster - if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) - model = plugin::CallAndReturn(modelIndex); - else - model = reinterpret_cast(Models[modelIndex]); - - auto str = std::string(std::string_view(model->m_szGameName, sizeof(model->m_szGameName))); // to proper cstr - WriteStringParam(thread, str.c_str()); - return OR_CONTINUE; - } - //0ADC=1, test_cheat %1d% - OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(text); - SetScriptCondResult(thread, TestCheat(text)); + SetScriptCondResult(thread, false); return OR_CONTINUE; } //0ADD=1,spawn_car_with_model %1o% at_player_location OpcodeResult __stdcall opcode_0ADD(CRunningScript *thread) { - DWORD mi; - *thread >> mi; + auto modelIndex = OPCODE_READ_PARAM_INT(); CVehicleModelInfo* model; // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) { - model = plugin::CallAndReturn(mi); + model = plugin::CallAndReturn(modelIndex); } else { - model = reinterpret_cast(Models[mi]); + model = reinterpret_cast(CModelInfo::ms_modelInfoPtrs[modelIndex]); } - if (model->m_nVehicleType != VEHICLE_TYPE_TRAIN && model->m_nVehicleType != VEHICLE_TYPE_UNKNOWN) SpawnCar(mi); - return OR_CONTINUE; - } - - //0ADE=2,%2d% = text_by_GXT_entry %1d% - OpcodeResult __stdcall opcode_0ADE(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long - - auto txt = GetInstance().TextManager.Get(gxt); - - if (IsVarString(thread->PeekDataType())) - { - OPCODE_WRITE_PARAM_STRING(txt); - } - else - { - OPCODE_WRITE_PARAM_PTR(txt); // address of the text - } - return OR_CONTINUE; - } - - //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% - OpcodeResult __stdcall opcode_0ADF(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long - OPCODE_READ_PARAM_STRING(txt); - - GetInstance().TextManager.AddFxt(gxt, txt); - return OR_CONTINUE; - } - - //0AE0=1,remove_dynamic_GXT_entry %1d% - OpcodeResult __stdcall opcode_0AE0(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long - - GetInstance().TextManager.RemoveFxt(gxt); + if (model->m_nVehicleType != VEHICLE_TYPE_TRAIN && model->m_nVehicleType != VEHICLE_TYPE_UNKNOWN) SpawnCar(modelIndex); return OR_CONTINUE; } @@ -1726,18 +1516,6 @@ namespace CLEO return OR_CONTINUE; } - //0AED=3,%3d% = float %1d% to_string_format %2d% - OpcodeResult __stdcall opcode_0AED(CRunningScript *thread) - { - // this opcode is useless now - auto val = OPCODE_READ_PARAM_FLOAT(); - OPCODE_READ_PARAM_STRING(format); - auto resultArg = GetStringParamWriteBuffer(thread); OPCODE_VALIDATE_STR_ARG_WRITE(resultArg.data) - - sprintf_s(resultArg.data, resultArg.size, format, val); - return OR_CONTINUE; - } - //0AEE=3,%3d% = %1d% exp %2d% //all floats OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread) { @@ -1879,7 +1657,7 @@ extern "C" return ReadStringParam(thread, buff, buffSize); } - void WINAPI CLEO_ReadStringParamWriteBuffer(CLEO::CRunningScript* thread, char** outBuf, int* outBufSize, DWORD* outNeedsTerminator) + void WINAPI CLEO_ReadStringParamWriteBuffer(CLEO::CRunningScript* thread, char** outBuf, int* outBufSize, BOOL* outNeedsTerminator) { if (thread == nullptr || outBuf == nullptr || @@ -1971,7 +1749,6 @@ extern "C" if(ReadFormattedString(thread, buf, bufSize, format) == -1) // error? { - LOG_WARNING(thread, "%s in script %s", CCustomOpcodeSystem::lastErrorMsg.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return nullptr; // error } @@ -2158,6 +1935,92 @@ extern "C" std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator } + DirectoryList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) + { + DirectoryList result; + result.count = 0; + result.paths = nullptr; + + if (searchPath == nullptr) + { + return result; // invalid param + } + + if (!listDirs && !listFiles) + { + return result; // nothing to list, done + } + + // TODO: if available call ModLoader here instead + // scriptFileDir, scriptWorkDir, searchPath + + auto fsSearchPath = FS::path(searchPath); + if (!fsSearchPath.is_absolute()) + { + auto workDir = (thread != nullptr) ? + ((CCustomScript*)thread)->GetWorkDir() : + Filepath_Root.c_str(); + + fsSearchPath = workDir / fsSearchPath; + } + + WIN32_FIND_DATA wfd = { 0 }; + HANDLE hSearch = FindFirstFile(searchPath, &wfd); + if (hSearch == INVALID_HANDLE_VALUE) + { + TRACE("No files found in: %s", searchPath); + return result; + } + + std::set found; + do + { + if (!listDirs && (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + continue; // skip directories + } + + if (!listFiles && !(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + continue; // skip files + } + + auto path = FS::path(wfd.cFileName); + if (!path.is_absolute()) // keep absolute in case somebody hooked the APIs to return so + path = fsSearchPath.parent_path() / path; + + found.insert(path.string()); + } + while (FindNextFile(hSearch, &wfd)); + + // create results list + result.paths = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers + + for(auto& path : found) + { + char* str = (char*)malloc(path.length() + 1); + strcpy(str, path.c_str()); + + result.paths[result.count] = str; + result.count++; + } + + return result; + } + + void WINAPI CLEO_ListDirectoryFree(DirectoryList list) + { + if (list.count > 0 && list.paths != nullptr) + { + for (DWORD i = 0; i < list.count; i++) + { + free(list.paths[i]); + } + + free(list.paths); + } + } + BOOL WINAPI CLEO_GetScriptDebugMode(const CLEO::CRunningScript* thread) { return reinterpret_cast(thread)->GetDebugMode(); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index bdf098e2..1197aaf9 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -61,13 +61,6 @@ namespace CLEO }; extern void(__thiscall * ProcessScript)(CRunningScript*); - - struct StringParamBufferInfo - { - char* data = nullptr; - DWORD size = 0; - bool needTerminator = false; - }; // Read null-terminated string into the buffer // returns pointer to string or nullptr on fail diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 663f01a7..e7da16de 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -85,28 +85,10 @@ namespace CLEO { 0x00BA86F0, memory_und, 0x00BA86F0, 0x00BAAD70, 0x00C36020 }, // MA_RADAR_BLIPS, { 0x00C2B9C8, memory_und, 0x00C2B9C8, 0x00C2E188, 0x00CAC1E0 }, // MA_HANDLING, { 0x0056E210, memory_und, 0x0056E210, 0x0056E6B0, 0x00563900 }, // MA_GET_PLAYER_PED_FUNCTION, - { 0x00A9B0C8, memory_und, 0x00A9B0C8, 0x00A9D748, 0x00B0FFD8 }, // MA_MODELS, { 0x0043A0B0, memory_und, 0x0043A0B0, 0x0043A136, 0x0043D3D0 }, // MA_SPAWN_CAR_FUNCTION, // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM - { 0x00588BE0, memory_und, 0x00588BE0, 0x005893B0, 0x00596980 }, // MA_TEXT_BOX_FUNCTION, - { 0x0069F2B0, memory_und, 0x0069F2B0, 0x0069FAD0, 0x006CBF40 }, // MA_STYLED_TEXT_FUNCTION, - { 0x0069F0B0, memory_und, 0x0069F0B0, 0x0069F8D0, 0x006CBD50 }, // MA_TEXT_LOW_PRIORITY_FUNCTION, - { 0x0069F1E0, memory_und, 0x0069F1E0, 0x0069FA00, 0x006CBE80 }, // MA_TEXT_HIGH_PRIORITY_FUNCTION, - { 0x006A0000, memory_und, 0x006A0000, 0x006A0820, 0x006CCC90 }, // MA_CTEXT_TKEY_LOCATE_FUNCTION, - { 0x006A0050, memory_und, 0x006A0050, 0x006A0870, 0x006CCCE0 }, // MA_CALL_CTEXT_LOCATE, - { 0x00C1B340, memory_und, 0x00C1B340, 0x00C1DB00, 0x00946CC8 }, // MA_GAME_TEXTS, - { 0x00969110, memory_und, 0x00969110, 0x0096B790, 0x009DE3F8 }, // MA_CHEAT_STRING, - { 0x00B72910, memory_und, 0x00B72910, 0x00B74F90, 0x00BFF370 }, // MA_MPACK_NUMBER, - - // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM - { 0x00745560, memory_und, 0x00745560, 0x00745D90, 0x0077F3A0 }, // MA_CREATE_MAIN_WINDOW_FUNCTION, { 0x007487A8, memory_und, 0x007487F8, 0x0074907C, 0x0078276D }, // MA_CALL_CREATE_MAIN_WINDOW, - { 0x00B6F028, memory_und, 0x00B6F028, 0x00B716A8, 0x00BFBBE0 }, // MA_CAMERA, - { 0x00B7CB48, memory_und, 0x00B7CB48, 0x00B7F1C8, 0x00C0F4F9 }, // MA_CODE_PAUSE, - { 0x00B7CB49, memory_und, 0x00B7CB49, 0x00B7F1C9, 0x00C0F4FA }, // MA_USER_PAUSE, - { 0x00C1703C, memory_und, 0x00C1703C, 0x00C197FC, 0x00CA3578 }, // MA_RW_CAMERA_PP, - { 0x00748454, memory_und, 0x007484A4, 0x00748D24, 0x0078240C }, // MA_DEF_WINDOW_PROC_PTR, }; eGameVersion DetermineGameVersion() diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index aa0a82fa..34867776 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -101,28 +101,9 @@ namespace CLEO MA_RADAR_BLIPS, MA_HANDLING, MA_GET_PLAYER_PED_FUNCTION, - MA_MODELS, MA_SPAWN_CAR_FUNCTION, - // TextManager - MA_TEXT_BOX_FUNCTION, - MA_STYLED_TEXT_FUNCTION, - MA_TEXT_LOW_PRIORITY_FUNCTION, - MA_TEXT_HIGH_PRIORITY_FUNCTION, - MA_CTEXT_TKEY_LOCATE_FUNCTION, - MA_CALL_CTEXT_LOCATE, - MA_GAME_TEXTS, - MA_CHEAT_STRING, - MA_MPACK_NUMBER, - - // SoundSystem - MA_CREATE_MAIN_WINDOW_FUNCTION, MA_CALL_CREATE_MAIN_WINDOW, - MA_CAMERA, - MA_CODE_PAUSE, - MA_USER_PAUSE, - MA_RW_CAMERA_PP, - MA_DEF_WINDOW_PROC_PTR, MA_TOTAL, }; diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 961d13c9..ac794e7f 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -126,7 +126,6 @@ namespace CLEO CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); DmaFix.Inject(CodeInjector); - TextManager.Inject(CodeInjector); OpcodeSystem.Inject(CodeInjector); ScriptEngine.Inject(CodeInjector); @@ -168,8 +167,6 @@ namespace CLEO // execute registered callbacks GetInstance().CallCallbacks(eCallbackId::GameBegin, saveSlot); - - TextManager.LoadFxts(); } void CCleoInstance::GameEnd() @@ -181,7 +178,6 @@ namespace CLEO GetInstance().CallCallbacks(eCallbackId::GameEnd); // execute registered callbacks ScriptEngine.GameEnd(); OpcodeSystem.FinalizeScriptObjects(); - TextManager.Clear(); saveSlot = -1; } diff --git a/source/CleoBase.h b/source/CleoBase.h index e6f1b9d2..ef46daac 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -9,7 +9,6 @@ #include "CPluginSystem.h" #include "CScriptEngine.h" #include "CCustomOpcodeSystem.h" -#include "CTextManager.h" #include "FileEnumerator.h" #include "crc32.h" #include "OpcodeInfoDatabase.h" @@ -28,7 +27,6 @@ namespace CLEO CCodeInjector CodeInjector; CGameVersionManager VersionManager; CScriptEngine ScriptEngine; - CTextManager TextManager; CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; CPluginSystem PluginSystem; diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index 05b61eb2..59593ce3 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -1,71 +1,20 @@ #pragma once +#include "..\cleo_sdk\CLEO.h" #include template void FilesWalk(const char* directory, const char* extension, T callback) { - /*try - { - for (auto& it : FS::directory_iterator(directory)) - { - if (it.is_regular_file()) - { - auto& filePath = it.path(); - - if (extension != nullptr) - { - if (_stricmp(filePath.extension().string().c_str(), extension) != 0) - { - continue; - } - } + std::string searchPath = directory; + if (searchPath.back() != '\\' && searchPath.back() != '/') searchPath.push_back('\\'); + searchPath += "*"; + searchPath += extension; - auto result = FS::absolute(filePath); - callback(result.string().c_str(), result.filename().string().c_str()); - } - } - } - catch (const std::exception& ex) + auto list = CLEO::CLEO_ListDirectory(nullptr, searchPath.c_str(), false, true); + for (DWORD i = 0; i < list.count; i++) { - TRACE("Error while iterating directory: %s", ex.what()); - }*/ - - // Re-implemented with raw search APIs for compatibility with ModLoader. - // The ModLoader should be updated anyway to solve potential file access problems in more advanced Cleo scripts - - std::string pattern = directory; - if(!pattern.empty() && pattern.back() != '\\') pattern.push_back('\\'); - - const size_t baseDirLen = pattern.length(); - - pattern.push_back('*'); - if (extension != nullptr) pattern.append(extension); - - WIN32_FIND_DATA wfd = { 0 }; - HANDLE hSearch = FindFirstFile(pattern.c_str(), &wfd); - - if (hSearch == INVALID_HANDLE_VALUE) - { - TRACE("No files found in: %s", pattern.c_str()); - return; + auto fsPath = FS::path(list.paths[i]); + callback(list.paths[i], fsPath.filename().string().c_str()); } - do - { - if (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - { - continue; // skip directories - } - - std::string path; - if (FS::path(wfd.cFileName).is_absolute()) - path = wfd.cFileName; // somebody hacked findFirstFile APIs and is providing us absolute path - else - path = pattern.substr(0, baseDirLen) + wfd.cFileName; // standard - - auto result = FS::weakly_canonical(path); // will use CWD if input path was relative! - callback(result.string().c_str(), result.filename().string().c_str()); - - } while (FindNextFile(hSearch, &wfd)); - - FindClose(hSearch); + CLEO::CLEO_ListDirectoryFree(list); } diff --git a/source/cleo.def b/source/cleo.def index a289abb9..1c1f8bbc 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -35,18 +35,20 @@ EXPORTS _CLEO_GetScriptInfoStr@16 @32 _CLEO_GetScriptParamInfoStr@12 @33 _CLEO_ResolvePath@12 @34 - _CLEO_GetScriptDebugMode@4 @35 - _CLEO_SetScriptDebugMode@8 @36 - _CLEO_Log@8 @37 - _CLEO_ReadStringParamWriteBuffer@16 @38 - _CLEO_GetOpcodeParamsArray@0 @39 - _CLEO_GetParamsHandledCount@0 @40 - _CLEO_PeekIntOpcodeParam@4 @41 - _CLEO_PeekFloatOpcodeParam@4 @42 - _CLEO_PeekPointerToScriptVariable@4 @43 - _CLEO_GetScriptByName@16 @44 - _CLEO_GetScriptByFilename@8 @45 - _CLEO_GetScriptFilename@4 @46 - _CLEO_GetScriptWorkDir@4 @47 - _CLEO_SetScriptWorkDir@8 @48 - _CLEO_RegisterCommand@8 @49 + _CLEO_ListDirectory@16 @35 + _CLEO_ListDirectoryFree@8 @36 + _CLEO_GetScriptDebugMode@4 @37 + _CLEO_SetScriptDebugMode@8 @38 + _CLEO_Log@8 @39 + _CLEO_ReadStringParamWriteBuffer@16 @40 + _CLEO_GetOpcodeParamsArray@0 @41 + _CLEO_GetParamsHandledCount@0 @42 + _CLEO_PeekIntOpcodeParam@4 @43 + _CLEO_PeekFloatOpcodeParam@4 @44 + _CLEO_PeekPointerToScriptVariable@4 @45 + _CLEO_GetScriptByName@16 @46 + _CLEO_GetScriptByFilename@8 @47 + _CLEO_GetScriptFilename@4 @48 + _CLEO_GetScriptWorkDir@4 @49 + _CLEO_SetScriptWorkDir@8 @50 + _CLEO_RegisterCommand@8 @51 diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index 0518c8de..d0ebcf50 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A9A" // open_file debug_on diff --git a/tests/cleo_tests/MemoryOperations/0A8C.txt b/tests/cleo_tests/MemoryOperations/0A8C.txt index eab94080..d8b8f543 100644 --- a/tests/cleo_tests/MemoryOperations/0A8C.txt +++ b/tests/cleo_tests/MemoryOperations/0A8C.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name '0A8C' test("0A8C (write_memory)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0A8D.txt b/tests/cleo_tests/MemoryOperations/0A8D.txt index 1bc929b5..9e2857fa 100644 --- a/tests/cleo_tests/MemoryOperations/0A8D.txt +++ b/tests/cleo_tests/MemoryOperations/0A8D.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name '0A8D' test("0A8D (read_memory)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0A96.txt b/tests/cleo_tests/MemoryOperations/0A96.txt index dc5c20bb..2cc17b0f 100644 --- a/tests/cleo_tests/MemoryOperations/0A96.txt +++ b/tests/cleo_tests/MemoryOperations/0A96.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A96" // get_ped_pointer test("0A96 (get_ped_pointer)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0A97.txt b/tests/cleo_tests/MemoryOperations/0A97.txt index 1717e8d6..e1472875 100644 --- a/tests/cleo_tests/MemoryOperations/0A97.txt +++ b/tests/cleo_tests/MemoryOperations/0A97.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A97" // get_vehicle_pointer test("0A97 (get_vehicle_pointer)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0A98.txt b/tests/cleo_tests/MemoryOperations/0A98.txt index 60c2c264..26c6d9ae 100644 --- a/tests/cleo_tests/MemoryOperations/0A98.txt +++ b/tests/cleo_tests/MemoryOperations/0A98.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A98" // get_vehicle_pointer test("0A98 (get_object_pointer)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt index ac496cd2..5b9d9012 100644 --- a/tests/cleo_tests/MemoryOperations/0AA4.txt +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AA4" test("0AA4 (get_dynamic_library_procedure)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AC6.txt b/tests/cleo_tests/MemoryOperations/0AC6.txt index 8df82524..cf2585b7 100644 --- a/tests/cleo_tests/MemoryOperations/0AC6.txt +++ b/tests/cleo_tests/MemoryOperations/0AC6.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC6" // get_label_pointer test("0AC6 (get_label_pointer)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AC7.txt b/tests/cleo_tests/MemoryOperations/0AC7.txt index 1974662f..45190382 100644 --- a/tests/cleo_tests/MemoryOperations/0AC7.txt +++ b/tests/cleo_tests/MemoryOperations/0AC7.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC7" // get_var_pointer test("0AC7 (get_var_pointer)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AC8.txt b/tests/cleo_tests/MemoryOperations/0AC8.txt index 057244f4..6998e06c 100644 --- a/tests/cleo_tests/MemoryOperations/0AC8.txt +++ b/tests/cleo_tests/MemoryOperations/0AC8.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC8" // allocate_memory test("0AC8 (allocate_memory)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AC9.txt b/tests/cleo_tests/MemoryOperations/0AC9.txt index 11c1eb33..aa822cb6 100644 --- a/tests/cleo_tests/MemoryOperations/0AC9.txt +++ b/tests/cleo_tests/MemoryOperations/0AC9.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC9" // free_memory test("0AC9 (free_memory)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AE9.txt b/tests/cleo_tests/MemoryOperations/0AE9.txt index 6e76df78..9e73de1e 100644 --- a/tests/cleo_tests/MemoryOperations/0AE9.txt +++ b/tests/cleo_tests/MemoryOperations/0AE9.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AE9" // pop_float test("0AE9 (pop_float)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AEA.txt b/tests/cleo_tests/MemoryOperations/0AEA.txt index 555bb926..5b125a4c 100644 --- a/tests/cleo_tests/MemoryOperations/0AEA.txt +++ b/tests/cleo_tests/MemoryOperations/0AEA.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEA" // get_ped_ref test("0AEA (get_ped_ref)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AEB.txt b/tests/cleo_tests/MemoryOperations/0AEB.txt index de66d9bd..f60d5929 100644 --- a/tests/cleo_tests/MemoryOperations/0AEB.txt +++ b/tests/cleo_tests/MemoryOperations/0AEB.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEB" // get_vehicle_ref test("0AEB (get_vehicle_ref)", @tests) diff --git a/tests/cleo_tests/MemoryOperations/0AEC.txt b/tests/cleo_tests/MemoryOperations/0AEC.txt index e97d7521..ae56071e 100644 --- a/tests/cleo_tests/MemoryOperations/0AEC.txt +++ b/tests/cleo_tests/MemoryOperations/0AEC.txt @@ -1,5 +1,5 @@ {$CLEO .s} -{$INCLUDE_ONCE ../cleo_tester.txt} +{$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEC" // get_object_ref test("0AEC (get_object_ref)", @tests) diff --git a/tests/cleo_tests/Text/0AD3.txt b/tests/cleo_tests/Text/0AD3.txt new file mode 100644 index 00000000..5c2879ea --- /dev/null +++ b/tests/cleo_tests/Text/0AD3.txt @@ -0,0 +1,72 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0AD3' +test("0AD3 (string_format)", @tests) +terminate_this_custom_script + + +function tests + it("should format string", @test1) + it("should respect short string variable size", @test2) + it("should respect long string variable size", @test3) + it("should create long text", @test4) + + return + + function test1 + string_format {buffer} 0@v {format} "" {args} + assert_eqs(0@v, "") + + string_format {buffer} 0@v {format} "test" {args} + assert_eqs(0@v, "test") + + string_format {buffer} 0@v {format} "char %c" {args} 0x41 + assert_eqs(0@v, "char A") + + string_format {buffer} 0@v {format} "int %d" {args} 3 + assert_eqs(0@v, "int 3") + + string_format {buffer} 0@v {format} "hex %x" {args} 0x123 + assert_eqs(0@v, "hex 123") + + string_format {buffer} 0@v {format} "num %0.1f" {args} 1.234 + assert_eqs(0@v, "num 1.2") + + string_format {buffer} 0@v {format} "str %s" {args} "text" + assert_eqs(0@v, "str text") + end + + function test2 + 0@ = 0xAAAAAAAA + 1@ = 0xBBBBBBBB + 2@ = 0xCCCCCCCC + + string_format {buffer} 0@s {format} "some longer test text" {args} + + assert_eqs(0@s, "some lon") // clamped to size + assert_eq(2@, 0xCCCCCCCC) + end + + function test3 + 0@ = 0xAAAAAAAA + 1@ = 0xBBBBBBBB + 2@ = 0xCCCCCCCC + 3@ = 0xDDDDDDDD + 4@ = 0xEEEEEEEE + + string_format {buffer} 0@v {format} "some longer test text" {args} + + assert_eqs(0@v, "some longer test") // clamped to size + assert_eq(4@, 0xEEEEEEEE) + end + + function test4 + 0@ = allocate_memory {size} 64 + + string_format {buffer} 0@ {format} "some longer test text" {args} + + assert_eqs(0@, "some longer test text") // not clamped + free_memory 0@ + end +end diff --git a/tests/cleo_tests/Text/0AD4.txt b/tests/cleo_tests/Text/0AD4.txt new file mode 100644 index 00000000..24decbc2 --- /dev/null +++ b/tests/cleo_tests/Text/0AD4.txt @@ -0,0 +1,75 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0AD4' +test("0AD4 (scan_string)", @tests) +terminate_this_custom_script + + +function tests + it("should scan numbers", @test1) + it("should scan characters", @test2) + it("should scan strings", @test3) + it("should report arg count missmatch", @test4) + it("should respect target string size", @test5) + return + + function test1 + scan_string {string} "input 1 2 4 8.0 16.0 32.0" {format} "input %d %d %d %f %f %f" {var_nValues} 0@ {var_values} 1@ 2@ 3@ 4@ 5@ 6@ + assert_result_true() + assert_eq(0@, 6) // read values count + assert_eq(1@, 1) + assert_eq(2@, 2) + assert_eq(3@, 4) + assert_eqf(4@, 8.0) + assert_eqf(5@, 16.0) + assert_eqf(6@, 32.0) + end + + function test2 + scan_string {string} "ABC" {format} "%c%c%c" {var_nValues} 0@ {var_values} 1@ 2@ 3@ + assert_result_true() + assert_eq(0@, 3) // read values count + assert_eq(1@, 0x41) // A + assert_eq(2@, 0x42) // B + assert_eq(3@, 0x43) // C + end + + function test3 + scan_string {string} "some testing text" {format} "%s %s %s" {var_nValues} 0@ {var_values} 1@s 3@s 5@s + assert_result_true() + assert_eq(0@, 3) // read values count + assert_eqs(1@s, "some") + assert_eqs(3@s, "testing") + assert_eqs(5@s, "text") + end + + function test4 + 3@ = 0xCCCCCCCC + scan_string {string} "input 1 2" {format} "input %d %d %d" {var_nValues} 0@ {var_values} 1@ 2@ 3@ + assert_result_false() + assert_eq(0@, 2) // read values count + assert_eq(1@, 1) + assert_eq(2@, 2) + assert_eq(3@, 0xCCCCCCCC) // unchanged + end + + function test5 + 1@ = 0x77777777 + 2@ = 0x88888888 + 3@ = 0x99999999 + 4@ = 0xAAAAAAAA + 5@ = 0xBBBBBBBB + 6@ = 0xCCCCCCCC + 7@ = 0xDDDDDDDD + 8@ = 0xEEEEEEEE + + scan_string {string} "first_very_long_test_text second_very_long_test_text" {format} "%s %s" {var_nValues} 0@ {var_values} 1@s 4@v, + assert_result_true() + assert_eq(0@, 2) // read values count + assert_eqs(1@s, "first_ve") // clamped to variable size + assert_eq(3@, 0x99999999) // unchanged + assert_eqs(4@v, "second_very_long") // clamped to variable size + assert_eq(8@, 0xEEEEEEEE) // unchanged + end +end diff --git a/tests/cleo_tests/Text/0ADB.txt b/tests/cleo_tests/Text/0ADB.txt new file mode 100644 index 00000000..5695cf41 --- /dev/null +++ b/tests/cleo_tests/Text/0ADB.txt @@ -0,0 +1,18 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0ADB' +test("0ADB (get_name_of_vehicle_model)", @tests) +terminate_this_custom_script + +function tests + it("should return vehicle model name", @test1) + + return + + function test1 + 0@v = get_name_of_vehicle_model {modelId} 400 + + assert_eqs(0@v, "LANDSTK") + end +end diff --git a/tests/cleo_tests/Text/0ADE.txt b/tests/cleo_tests/Text/0ADE.txt new file mode 100644 index 00000000..daf1529a --- /dev/null +++ b/tests/cleo_tests/Text/0ADE.txt @@ -0,0 +1,32 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0ADE' +test("0ADE (get_text_label_string)", @tests) +terminate_this_custom_script + +function tests + it("should return into variable", @test1) + it("should return source pointer", @test2) + it("should return empty", @test3) + return + + function test1 + 0@v = get_text_label_string {key} 'DEAD' + assert_eqs(0@v, "Wasted") + end + + function test2 + 0@ = get_text_label_string {key} 'DEAD' + assert_ptr(0@) + assert_eqs(0@, "Wasted") + end + + function test3 + 0@v = get_text_label_string {key} 'CL_INVA' // invalid label + assert_eqs(0@v, "") + + 0@ = get_text_label_string {key} 'CL_INVA' // invalid label + assert_eqs(0@, "") + end +end diff --git a/tests/cleo_tests/Text/0ADF.txt b/tests/cleo_tests/Text/0ADF.txt new file mode 100644 index 00000000..078fb612 --- /dev/null +++ b/tests/cleo_tests/Text/0ADF.txt @@ -0,0 +1,23 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0ADF' +test("0ADF (add_text_label)", @tests) +terminate_this_custom_script + +function tests + it("should add dynamic GXT", @test1) + return + + function test1 + // gxt entry not present yet + 0@v = get_text_label_string {key} 'CLE0ADF' + assert_eqs(0@v, "") + + add_text_label {dynamicKey} 'CLE0ADF' {text} "cleo test" + 0@v = get_text_label_string {key} 'CLE0ADF' + assert_eqs(0@v, "cleo test") + + remove_text_label {key} 'CLE0ADF' // cleanup + end +end diff --git a/tests/cleo_tests/Text/0AE0.txt b/tests/cleo_tests/Text/0AE0.txt new file mode 100644 index 00000000..42abcead --- /dev/null +++ b/tests/cleo_tests/Text/0AE0.txt @@ -0,0 +1,25 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0AE0' +test("0AE0 (remove_text_label)", @tests) +terminate_this_custom_script + +function tests + it("should remove dynamic GXT", @test1) + return + + function test1 + // gxt entry not present yet + 0@v = get_text_label_string {key} 'CLE0AE0' + assert_eqs(0@v, "") + + add_text_label {dynamicKey} 'CLE0AE0' {text} "cleo test" + 0@v = get_text_label_string {key} 'CLE0AE0' + assert_eqs(0@v, "cleo test") + + remove_text_label {key} 'CLE0AE0' + 0@v = get_text_label_string {key} 'CLE0AE0' + assert_eqs(0@v, "") // successfully removed + end +end diff --git a/tests/cleo_tests/Text/0AED.txt b/tests/cleo_tests/Text/0AED.txt new file mode 100644 index 00000000..bb5ae2a9 --- /dev/null +++ b/tests/cleo_tests/Text/0AED.txt @@ -0,0 +1,17 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0AED' +test("0AED (string_float_format)", @tests) +terminate_this_custom_script + + +function tests + it("should print float", @test1) + return + + function test1 + 0@v = string_float_format {number} 1.66666 {format} "Float %0.3f" + assert_eqs(0@v, "Float 1.667") + end +end diff --git a/tests/cleo_tests/Text/2600.txt b/tests/cleo_tests/Text/2600.txt new file mode 100644 index 00000000..a68c6f0c --- /dev/null +++ b/tests/cleo_tests/Text/2600.txt @@ -0,0 +1,54 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2600' +test("2600 (is_text_empty)", @tests) +terminate_this_custom_script + + +function tests + it("short string should be empty", @test1) + it("short string should NOT be empty", @test2) + it("long string should be empty", @test3) + it("long string should NOT be empty", @test4) + it("buffer string should be empty", @test5) + it("buffer string should NOT be empty", @test6) + return + + function test1 + 1@s = '' + is_text_empty 1@s + assert_result_true() + end + + function test2 + 1@s = 'test' + is_text_empty 1@s + assert_result_false() + end + + function test3 + 1@v = "" + is_text_empty 1@v + assert_result_true() + end + + function test4 + 1@v = "test" + is_text_empty 1@s + assert_result_false() + end + + function test5 + 1@ = allocate_memory {size} 64 // 0 prefill in CLEO5 + is_text_empty 1@ + assert_result_true() + end + + function test6 + 1@ = allocate_memory {size} 64 + string_format {buffer} 1@ {format} "some text" + is_text_empty 1@ + assert_result_false() + end +end diff --git a/tests/cleo_tests/Text/2601.txt b/tests/cleo_tests/Text/2601.txt new file mode 100644 index 00000000..990e00ea --- /dev/null +++ b/tests/cleo_tests/Text/2601.txt @@ -0,0 +1,92 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2601' +test("2601 (is_text_equal)", @tests) +terminate_this_custom_script + + +function tests + before_each(@prepare_tests) + after_each(@cleanup_tests) + + it("should texts be equal", @test1) + it("should texts be NOT equal", @test2) + return + + :prepare_tests + 0@s = 'text-A' + 2@s = 'text-B' + 4@v = "text-A" + 8@v = "text-B" + 12@ = allocate_memory {size} 64 + string_format {buffer} 12@ {format} "text-A" + 13@ = allocate_memory {size} 64 + string_format {buffer} 13@ {format} "text-B" + 14@ = allocate_memory {size} 64 + string_format {buffer} 14@ {format} "TEXT-A" + 15@ = allocate_memory {size} 64 + string_format {buffer} 15@ {format} "tExT-b" + return + + :cleanup_tests + free_memory {address} 12@ + free_memory {address} 13@ + free_memory {address} 14@ + free_memory {address} 15@ + return + + function test1 + is_text_equal {text} 0@s {another} 0@s {ignoreCase} false + assert_result_true() + + is_text_equal {text} 0@s {another} 0@s {ignoreCase} true + assert_result_true() + + is_text_equal {text} 0@s {another} 4@v {ignoreCase} false + assert_result_true() + + is_text_equal {text} 0@s {another} 4@v {ignoreCase} true + assert_result_true() + + is_text_equal {text} 0@s {another} 12@ {ignoreCase} false + assert_result_true() + + is_text_equal {text} 0@s {another} 12@ {ignoreCase} true + assert_result_true() + + // case mismatch + is_text_equal {text} 0@s {another} 14@ {ignoreCase} true + assert_result_true() + + is_text_equal {text} 8@v {another} 15@ {ignoreCase} true + assert_result_true() + end + + function test2 + is_text_equal {text} 0@s {another} 2@s {ignoreCase} false + assert_result_false() + + is_text_equal {text} 0@s {another} 2@s {ignoreCase} true + assert_result_false() + + is_text_equal {text} 4@v {another} 8@v {ignoreCase} false + assert_result_false() + + is_text_equal {text} 4@v {another} 8@v {ignoreCase} true + assert_result_false() + + is_text_equal {text} 4@v {another} 13@ {ignoreCase} false + assert_result_false() + + is_text_equal {text} 4@v {another} 13@ {ignoreCase} true + assert_result_false() + + // case mismatch + is_text_equal {text} 0@s {another} 14@ {ignoreCase} false + assert_result_false() + + is_text_equal {text} 8@v {another} 15@ {ignoreCase} false + assert_result_false() + end +end diff --git a/tests/cleo_tests/Text/2602.txt b/tests/cleo_tests/Text/2602.txt new file mode 100644 index 00000000..f7640c50 --- /dev/null +++ b/tests/cleo_tests/Text/2602.txt @@ -0,0 +1,79 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2602' +test("2602 (is_text_in_text)", @tests) +terminate_this_custom_script + + +function tests + it("should contain sub-text", @test1) + it("should NOT contain sub-text", @test2) + return + + function test1 + is_text_in_text {text} "the_longer test_text" {subText} "the_longer test_text" {ignoreCase} false + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "the_longer test_text" {ignoreCase} true + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "long" {ignoreCase} false + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "long" {ignoreCase} true + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "_" {ignoreCase} false + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "_" {ignoreCase} true + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "" {ignoreCase} false + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "" {ignoreCase} true + assert_result_true() + + is_text_in_text {text} "" {subText} "" {ignoreCase} false + assert_result_true() + + is_text_in_text {text} "" {subText} "" {ignoreCase} true + assert_result_true() + + // case mismatch + is_text_in_text {text} "the_longer test_text" {subText} "THE" {ignoreCase} true + assert_result_true() + + is_text_in_text {text} "the_longer test_text" {subText} "LonGer" {ignoreCase} true + assert_result_true() + end + + function test2 + is_text_in_text {text} "long" {subText} "the_longer test_text" {ignoreCase} false + assert_result_false() + + is_text_in_text {text} "long" {subText} "the_longer test_text" {ignoreCase} true + assert_result_false() + + is_text_in_text {text} "the_longer test_text" {subText} "other" {ignoreCase} false + assert_result_false() + + is_text_in_text {text} "the_longer test_text" {subText} "other" {ignoreCase} true + assert_result_false() + + is_text_in_text {text} "" {subText} "other" {ignoreCase} false + assert_result_false() + + is_text_in_text {text} "" {subText} "other" {ignoreCase} true + assert_result_false() + + // case mismatch + is_text_in_text {text} "the_longer test_text" {subText} "THE" {ignoreCase} false + assert_result_false() + + is_text_in_text {text} "the_longer test_text" {subText} "LonGer" {ignoreCase} false + assert_result_false() + end +end diff --git a/tests/cleo_tests/Text/2603.txt b/tests/cleo_tests/Text/2603.txt new file mode 100644 index 00000000..0c7ee4b8 --- /dev/null +++ b/tests/cleo_tests/Text/2603.txt @@ -0,0 +1,79 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2603' +test("2603 (is_text_prefix)", @tests) +terminate_this_custom_script + + +function tests + it("should contain text prefix", @test1) + it("should NOT contain text prefix", @test2) + return + + function test1 + is_text_prefix {text} "the_longer test_text" {prefix} "" {ignoreCase} false + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "" {ignoreCase} true + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "t" {ignoreCase} false + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "t" {ignoreCase} true + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the" {ignoreCase} false + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the" {ignoreCase} true + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the_longer " {ignoreCase} false + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the_longer " {ignoreCase} true + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the_longer test_text" {ignoreCase} false + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "the_longer test_text" {ignoreCase} true + assert_result_true() + + // case mismatch + is_text_prefix {text} "the_longer test_text" {prefix} "THE" {ignoreCase} true + assert_result_true() + + is_text_prefix {text} "the_longer test_text" {prefix} "tHe_LoNgEr" {ignoreCase} true + assert_result_true() + end + + function test2 + is_text_prefix {text} "the_longer test_text" {prefix} "longer" {ignoreCase} false + assert_result_false() + + is_text_prefix {text} "the_longer test_text" {prefix} "longer" {ignoreCase} true + assert_result_false() + + is_text_prefix {text} "the_longer test_text" {prefix} "he" {ignoreCase} false + assert_result_false() + + is_text_prefix {text} "the_longer test_text" {prefix} "he" {ignoreCase} true + assert_result_false() + + is_text_prefix {text} "the" {prefix} "the_longer test_text" {ignoreCase} false + assert_result_false() + + is_text_prefix {text} "the" {prefix} "the_longer test_text" {ignoreCase} true + assert_result_false() + + // case mismatch + is_text_prefix {text} "the_longer test_text" {prefix} "THE" {ignoreCase} false + assert_result_false() + + is_text_prefix {text} "the_longer test_text" {prefix} "tHe_LoNgEr" {ignoreCase} false + assert_result_false() + end +end diff --git a/tests/cleo_tests/Text/2604.txt b/tests/cleo_tests/Text/2604.txt new file mode 100644 index 00000000..b82477fc --- /dev/null +++ b/tests/cleo_tests/Text/2604.txt @@ -0,0 +1,73 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2604' +test("2604 (is_text_suffix)", @tests) +terminate_this_custom_script + + +function tests + it("should contain text suffix", @test1) + it("should NOT contain text suffix", @test2) + return + + function test1 + is_text_suffix {text} "the_longer test_text" {suffix} "" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "t" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "t" {ignoreCase} true + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "text" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "text" {ignoreCase} true + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} " test_text" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} " test_text" {ignoreCase} true + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "the_longer test_text" {ignoreCase} false + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "the_longer test_text" {ignoreCase} true + assert_result_true() + + // case mismatch + is_text_suffix {text} "the_longer test_text" {suffix} "EXT" {ignoreCase} true + assert_result_true() + + is_text_suffix {text} "the_longer test_text" {suffix} "tEsT_TeXt" {ignoreCase} true + assert_result_true() + end + + function test2 + is_text_suffix {text} "the_longer test_text" {suffix} "tex" {ignoreCase} false + assert_result_false() + + is_text_suffix {text} "the_longer test_text" {suffix} "tex" {ignoreCase} true + assert_result_false() + + is_text_suffix {text} "text" {suffix} "the_longer test_text" {ignoreCase} false + assert_result_false() + + is_text_suffix {text} "text" {suffix} "the_longer test_text" {ignoreCase} true + assert_result_false() + + // case mismatch + is_text_suffix {text} "the_longer test_text" {suffix} "EXT" {ignoreCase} false + assert_result_false() + + is_text_suffix {text} "the_longer test_text" {suffix} "tEsT_TeXt" {ignoreCase} false + assert_result_false() + end +end diff --git a/tests/cleo_tests/cleo_tester.txt b/tests/cleo_tests/cleo_tester.inc similarity index 54% rename from tests/cleo_tests/cleo_tester.txt rename to tests/cleo_tests/cleo_tester.inc index 27a8fd9b..382ff5bb 100644 --- a/tests/cleo_tests/cleo_tester.txt +++ b/tests/cleo_tests/cleo_tester.inc @@ -27,7 +27,7 @@ function test(suite_name: integer, callback: int) int suite_name_buf = get_label_pointer @_cleo_tester_test_name copy_memory {src} suite_name {dest} suite_name_buf {size} 255 // used in an it trace - trace "Testing %s" suite_name + trace "~w~Testing %s" suite_name _cleo_tester_write_var(VAR_BEFORE_EACH, @_cleo_tester_stub) _cleo_tester_write_var(VAR_AFTER_EACH, @_cleo_tester_stub) @@ -54,7 +54,7 @@ function it(spec_name: integer, callback: int) run_spec - trace "~g~~h~~h~Test #%d PASSED" index + //trace "~g~~h~~h~Test #%d PASSED" index index++ _cleo_tester_write_var(VAR_TEST_INDEX, index) @@ -101,12 +101,12 @@ hex 00(256) end -:_cleo_tester_fail -int test_index = _cleo_tester_read_var(VAR_TEST_INDEX) -int test_name = get_label_pointer @_cleo_tester_spec_name -int assert_index = _cleo_tester_read_var(VAR_ASSERT_INDEX) -breakpoint "~r~~h~~h~~h~Test #%d Assert #%d FAILED! %d Expected, %d Actual" test_index assert_index {val1} 0@ {val2} 1@ -terminate_this_custom_script +function _cleo_tester_fail + int test_index = _cleo_tester_read_var(VAR_TEST_INDEX) + int test_name = get_label_pointer @_cleo_tester_spec_name + int assert_index = _cleo_tester_read_var(VAR_ASSERT_INDEX) + trace "~r~~h~~h~~h~Test #%d Assert #%d FAILED!" test_index assert_index +end function _cleo_tester_increment_assert int index = _cleo_tester_read_var(VAR_ASSERT_INDEX) @@ -114,46 +114,160 @@ function _cleo_tester_increment_assert _cleo_tester_write_var(VAR_ASSERT_INDEX, index) end +/// checks if bool value is true (different than 0) +function assert_true(flag: int) + _cleo_tester_increment_assert + if + flag == false + then + _cleo_tester_fail + trace "TRUE expected~n~%d occured" flag + breakpoint + terminate_this_custom_script + end +end + +/// checks if bool value is false +function assert_false(flag: int) + _cleo_tester_increment_assert + if + flag <> false + then + _cleo_tester_fail + trace "FALSE expected~n~%d occured" flag + breakpoint + terminate_this_custom_script + end +end + +/// checks if condition result value is true +:assert_result_true + goto_if_false @_assert_result_true + _cleo_tester_increment_assert + return + + :_assert_result_true + _cleo_tester_increment_assert + _cleo_tester_fail + trace "Condition result is FALSE, expected TRUE" + breakpoint + terminate_this_custom_script +return + +/// checks if condition result value is false +:assert_result_false + goto_if_false @_assert_result_false + + _cleo_tester_increment_assert + _cleo_tester_fail + trace "Condition result is TRUE, expected FALSE" + breakpoint + terminate_this_custom_script + + :_assert_result_false + _cleo_tester_increment_assert +return + /// checks if two int values are equal, otherwise stops the test execution -function assert_eq(val1: int, val2: int) +function assert_eq(actual: int, expected: int) _cleo_tester_increment_assert - val1 == val2 - jf @_cleo_tester_fail + if + actual <> expected + then + _cleo_tester_fail + trace "%08X expected~n~%08X occured" expected actual + breakpoint + terminate_this_custom_script + end end /// checks if two int values are not equal, otherwise stops the test execution -function assert_neq(val1: int, val2: int) - _cleo_tester_increment_assert - val1 <> val2 - jf @_cleo_tester_fail +function assert_neq(actual: int, expected: int) + if + actual == expected + then + _cleo_tester_fail + trace "Expected value different than %08X" actual + breakpoint + terminate_this_custom_script + end end /// checks if two float values are equal, otherwise stops the test execution -function assert_eqf(val1:float, val2:float) +function assert_eqf(actual:float, expected:float) _cleo_tester_increment_assert - val1 == val2 - jf @_cleo_tester_fail + if + actual <> expected + then + _cleo_tester_fail + trace "%f expected~n~%f occured" expected actual + breakpoint + terminate_this_custom_script + end end /// checks if two float values are not equal, otherwise stops the test execution -function assert_neqf(val1:float, val2:float) +function assert_neqf(actual:float, expected:float) _cleo_tester_increment_assert - val1 <> val2 - jf @_cleo_tester_fail + if + actual == expected + then + _cleo_tester_fail + trace "Expected value different than %f" actual + breakpoint + terminate_this_custom_script + end end /// checks if value is a valid pointer, otherwise stops the test execution function assert_ptr(ptr: int) _cleo_tester_increment_assert - ptr > 0x10000 // possibly valid pointer - jf @_cleo_tester_fail + if + ptr <= 0x10000 // possibly valid pointer + then + _cleo_tester_fail + trace "%08X is not valid pointer" ptr + breakpoint + terminate_this_custom_script + end end /// checks if value is not 0, otherwise stops the test execution function assert(flag: int) _cleo_tester_increment_assert - flag <> False - jf @_cleo_tester_fail + if + flag == 0 + then + _cleo_tester_fail + breakpoint + terminate_this_custom_script + end +end + +/// checks if two string values are equal, otherwise stops the test execution +function assert_eqs(actual:string, expected:string) + _cleo_tester_increment_assert + if + not is_text_equal {text} actual {another} expected {ignoreCase} false + then + _cleo_tester_fail + trace "`%s` expected~n~`%s` occured" expected actual + breakpoint + terminate_this_custom_script + end +end + +/// checks if two string values are not equal, otherwise stops the test execution +function assert_neqs(actual:string, expected:string) + _cleo_tester_increment_assert + if + is_text_equal {text} actual {another} expected {ignoreCase} false + then + _cleo_tester_fail + trace "Expected value different than `%s`" actual + breakpoint + terminate_this_custom_script + end end /// registers a callback that runs before each unit test (test setup) From ef11b984d2507945c9a7eac519ff1368a5719c98 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 21 Mar 2024 23:39:34 +0100 Subject: [PATCH 132/216] Fix of CFileMgr.cpp path (#108) --- cleo_plugins/Text/Text.vcxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index 785163b2..2069d0f6 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -122,7 +122,7 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - + From 61d8d12b5751668de33c10051396ceab83d45406 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 22 Mar 2024 12:04:25 +0100 Subject: [PATCH 133/216] Documentation updates (#109) --- CHANGELOG.md | 6 ++++-- cleo_plugins/Audio/Audio.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 386e4714..e4c68353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,7 +54,7 @@ - new opcode **2601 ([is_text_equal](https://library.sannybuilder.com/#/sa/text/2601))** - new opcode **2602 ([is_text_in_text](https://library.sannybuilder.com/#/sa/text/2602))** - new opcode **2603 ([is_text_prefix](https://library.sannybuilder.com/#/sa/text/2603))** - - new opcode **2604 ([is_text_suffix](https://library.sannybuilder.com/#/sa/text/2604))** + - new opcode **2604 ([is_text_suffix](https://library.sannybuilder.com/#/sa/text/2604))** - new and updated opcodes - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** @@ -65,6 +65,8 @@ - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - **cleo_return_\*** opcodes now can pass strings as return arguments - SCM functions **(0AB1)** now keep their own GOSUB's call stack + - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strins longer than target variable + - fixed result register not being cleared before function call in opcodes **0AA7** and **0AA8** - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `root:\` for _game root_ directory @@ -72,7 +74,7 @@ - `.\` for _this script file_ directory - `cleo:\` for _CLEO_ directory - `modules:\` for _CLEO\cleo_modules_ directory - - rewritten opcode **0A99 (set_current_directory)**. It no longer affects internal game state and other scripts + - rewritten opcode **0A99 (set_current_directory)**. Now it no longer affects internal game state or current directory in other scripts - improved error handling - more detailed error messages in multiple scenarios - some errors now cause the script to pause, instead of crashing the game diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index 323f6679..0c8060ec 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -401,7 +401,7 @@ class Audio return OR_CONTINUE; } - //250A=2,set_audio_stream_type %1d% + //250A=2,set_audio_stream_type %1d% type %2d% static OpcodeResult __stdcall opcode_250A(CScriptThread* thread) { auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); From 52b651301c7303c337fa931fef579baf0740734f Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 23 Mar 2024 15:31:38 +0100 Subject: [PATCH 134/216] Updates in load_dynamic_library opcode. (#110) --- .../MemoryOperations/MemoryOperations.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 648cd134..c4eb1b42 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -363,18 +363,21 @@ class MemoryOperations static OpcodeResult __stdcall opcode_0AA2(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(path); + + HMODULE ptr = nullptr; - // get absolute path - // in case of just filename let LoadLibrary resolve it itself + // resolve absolute path and try load char buff[MAX_PATH]; - if (std::filesystem::path(path).has_parent_path()) + strncpy(buff, path, sizeof(buff)); + CLEO_ResolvePath(thread, buff, sizeof(buff)); + ptr = LoadLibrary(buff); + + // in case of just filename let LoadLibrary resolve it itself + if (ptr == nullptr && !std::filesystem::path(path).has_parent_path()) { - strncpy(buff, path, sizeof(buff)); - CLEO_ResolvePath(thread, buff, sizeof(buff)); - path = buff; + ptr = LoadLibrary(path); } - auto ptr = LoadLibrary(path); if (ptr != nullptr) { m_libraries.insert(ptr); From f363d5fce345b4a21f229f00f594f76984250940 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 26 Mar 2024 17:19:44 +0100 Subject: [PATCH 135/216] Removed duplicated address checks from memory read/write opcodes. (#112) --- cleo_plugins/MemoryOperations/MemoryOperations.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index c4eb1b42..03335792 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -217,12 +217,6 @@ class MemoryOperations auto virtualProtect = OPCODE_READ_PARAM_BOOL(); // validate params - if ((size_t)address <= MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' pointer param in script %s\nScript suspended.", address, ScriptInfoStr(thread).c_str()); - return thread->Suspend(); - } - if (size < 0) { SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); @@ -263,12 +257,6 @@ class MemoryOperations auto virtualProtect = OPCODE_READ_PARAM_BOOL(); // validate params - if ((size_t)address <= MinValidAddress) - { - SHOW_ERROR("Invalid '0x%X' pointer param of in script %s\nScript suspended.", address, ScriptInfoStr(thread).c_str()); - return thread->Suspend(); - } - if (size < 0 || size > sizeof(SCRIPT_VAR)) { SHOW_ERROR("Invalid '%d' size argument in script %s\nScript suspended.", size, ScriptInfoStr(thread).c_str()); From 7bf06d1d6eac111668ae96838786924bbd57309f Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 28 Mar 2024 01:59:12 +0100 Subject: [PATCH 136/216] Minor fixes. (#113) --- CHANGELOG.md | 2 +- cleo_plugins/FileSystemOperations/FileSystemOperations.cpp | 2 +- cleo_sdk/CLEO_Utils.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4c68353..fe63f99d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,7 +65,7 @@ - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - **cleo_return_\*** opcodes now can pass strings as return arguments - SCM functions **(0AB1)** now keep their own GOSUB's call stack - - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strins longer than target variable + - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strings longer than target variable - fixed result register not being cleared before function call in opcodes **0AA7** and **0AA8** - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index dd3857ab..825465e2 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -687,7 +687,7 @@ class FileSystemOperations return OR_CONTINUE; } - //2302=3,write_block_to_file %1d% size %2d% address %3d% // IF and SET + //2302=3, write_block_to_file %1d% size %2d% address %3d% // IF and SET static OpcodeResult WINAPI opcode_2302(CRunningScript* thread) { auto handle = READ_FILE_HANDLE_PARAM(); diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index b48efeb2..e69e066e 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -398,7 +398,7 @@ namespace CLEO if ((size_t)target.data <= MinValidAddress) { - SHOW_ERROR("Invalid '0x%X' target pointer of output string argument in script %s \nScript suspended.", str, ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Invalid '0x%X' target pointer of output string argument in script %s \nScript suspended.", target.data, ScriptInfoStr(thread).c_str()); thread->Suspend(); return false; } From 398871a45695abe9827f857b6a16c2e44394e0d8 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 28 Mar 2024 02:16:52 +0100 Subject: [PATCH 137/216] Fixed style param when printing big formatted texts. (#114) --- cleo_plugins/Text/Text.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp index ea8ace2f..25d51d6c 100644 --- a/cleo_plugins/Text/Text.cpp +++ b/cleo_plugins/Text/Text.cpp @@ -115,7 +115,7 @@ class Text auto styleIdx = std::clamp(style, 0, (int)MsgBigStyleCount - 1); strncpy(msgBuffBig[styleIdx], text, sizeof(msgBuffBig[styleIdx])); - CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style); + CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style - 1); return OR_CONTINUE; } @@ -160,7 +160,7 @@ class Text auto styleIdx = std::clamp(style, 0, (int)MsgBigStyleCount - 1); strncpy(msgBuffBig[styleIdx], text, sizeof(msgBuffBig[styleIdx])); - CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style); + CMessages::AddBigMessage(msgBuffBig[styleIdx], time, style - 1); return OR_CONTINUE; } From 66f90f854f0ce320eedf414dd69d423e0d1d3b73 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 29 Mar 2024 17:37:00 +0100 Subject: [PATCH 138/216] New opcode display_text_formatted (#115) * New opcode display_text_formated * Changelog updated. * Solution fixes. --- CHANGELOG.md | 1 + cleo_plugins/Text/Text.cpp | 66 ++++++++++++++++++-------- cleo_plugins/Text/Text.vcxproj | 2 + cleo_plugins/Text/Text.vcxproj.filters | 8 +++- 4 files changed, 56 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe63f99d..57a994be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ - new opcode **2602 ([is_text_in_text](https://library.sannybuilder.com/#/sa/text/2602))** - new opcode **2603 ([is_text_prefix](https://library.sannybuilder.com/#/sa/text/2603))** - new opcode **2604 ([is_text_suffix](https://library.sannybuilder.com/#/sa/text/2604))** + - new opcode **2605 ([display_text_formatted](https://library.sannybuilder.com/#/sa/text/2605))** - new and updated opcodes - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp index 25d51d6c..8614efe4 100644 --- a/cleo_plugins/Text/Text.cpp +++ b/cleo_plugins/Text/Text.cpp @@ -7,6 +7,7 @@ #include "CModelInfo.h" #include "CText.h" #include "CTextManager.h" +#include "CTheScripts.h" #include using namespace CLEO; @@ -57,6 +58,7 @@ class Text CLEO_RegisterOpcode(0x2602, opcode_2602); // is_text_in_text CLEO_RegisterOpcode(0x2603, opcode_2603); // is_text_prefix CLEO_RegisterOpcode(0x2604, opcode_2604); // is_text_sufix + CLEO_RegisterOpcode(0x2605, opcode_2605); // display_text_formatted // register event callbacks CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); @@ -98,7 +100,7 @@ class Text } //0ACA=1,show_text_box %1d% - static OpcodeResult __stdcall opcode_0ACA(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACA(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(text); @@ -107,7 +109,7 @@ class Text } //0ACB=3,show_styled_text %1d% time %2d% style %3d% - static OpcodeResult __stdcall opcode_0ACB(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACB(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); @@ -120,7 +122,7 @@ class Text } //0ACC=2,show_text_lowpriority %1d% time %2d% - static OpcodeResult __stdcall opcode_0ACC(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACC(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); @@ -131,7 +133,7 @@ class Text } //0ACD=2,show_text_highpriority %1d% time %2d% - static OpcodeResult __stdcall opcode_0ACD(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACD(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(text); auto time = OPCODE_READ_PARAM_INT(); @@ -142,7 +144,7 @@ class Text } //0ACE=-1,show_formatted_text_box %1d% - static OpcodeResult __stdcall opcode_0ACE(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACE(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING_FORMATTED(text); @@ -151,7 +153,7 @@ class Text } //0ACF=-1,show_formatted_styled_text %1d% time %2d% style %3d% - static OpcodeResult __stdcall opcode_0ACF(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ACF(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); @@ -165,7 +167,7 @@ class Text } //0AD0=-1,show_formatted_text_lowpriority %1d% time %2d% - static OpcodeResult __stdcall opcode_0AD0(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AD0(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); @@ -177,7 +179,7 @@ class Text } //0AD1=-1,show_formatted_text_highpriority %1d% time %2d% - static OpcodeResult __stdcall opcode_0AD1(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AD1(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(format); auto time = OPCODE_READ_PARAM_INT(); @@ -189,7 +191,7 @@ class Text } //0AD3=-1,string %1d% format %2d% ... - static OpcodeResult __stdcall opcode_0AD3(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AD3(CLEO::CRunningScript* thread) { auto result = OPCODE_READ_PARAM_OUTPUT_VAR_STRING(); OPCODE_READ_PARAM_STRING_FORMATTED(text); @@ -199,7 +201,7 @@ class Text } //0AD4=-1,%3d% = scan_string %1d% format %2d% //IF and SET - static OpcodeResult __stdcall opcode_0AD4(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AD4(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(src); OPCODE_READ_PARAM_STRING(format); @@ -271,7 +273,7 @@ class Text } //0ADB=2,%2d% = car_model %1d% name - static OpcodeResult __stdcall opcode_0ADB(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ADB(CLEO::CRunningScript* thread) { auto modelIndex = OPCODE_READ_PARAM_UINT(); @@ -289,7 +291,7 @@ class Text } //0ADE=2,%2d% = text_by_GXT_entry %1d% - static OpcodeResult __stdcall opcode_0ADE(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ADE(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long @@ -307,7 +309,7 @@ class Text } //0ADF=2,add_dynamic_GXT_entry %1d% text %2d% - static OpcodeResult __stdcall opcode_0ADF(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0ADF(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long OPCODE_READ_PARAM_STRING(txt); @@ -317,7 +319,7 @@ class Text } //0AE0=1,remove_dynamic_GXT_entry %1d% - static OpcodeResult __stdcall opcode_0AE0(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AE0(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING_LEN(gxt, 7); // GXT labels can be max 7 character long @@ -326,7 +328,7 @@ class Text } //0AED=3,%3d% = float %1d% to_string_format %2d% - static OpcodeResult __stdcall opcode_0AED(CRunningScript* thread) + static OpcodeResult __stdcall opcode_0AED(CLEO::CRunningScript* thread) { // this opcode is useless now auto val = OPCODE_READ_PARAM_FLOAT(); @@ -340,7 +342,7 @@ class Text } //2600=1, is_text_empty %1s% - static OpcodeResult __stdcall opcode_2600(CRunningScript* thread) + static OpcodeResult __stdcall opcode_2600(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(str); @@ -349,7 +351,7 @@ class Text } //2601=3, is_text_equal %1s% another %2s% ignore_case %3d% - static OpcodeResult __stdcall opcode_2601(CRunningScript* thread) + static OpcodeResult __stdcall opcode_2601(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(a); OPCODE_READ_PARAM_STRING(b); @@ -362,7 +364,7 @@ class Text } //2602=3, is_text_in_text %1s% sub_text %2s% ignore_case %3d% - static OpcodeResult __stdcall opcode_2602(CRunningScript* thread) + static OpcodeResult __stdcall opcode_2602(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(str); OPCODE_READ_PARAM_STRING(substr); @@ -381,7 +383,7 @@ class Text } //2603=3, is_text_prefix %1s% prefix %2s% ignore_case %3d% - static OpcodeResult __stdcall opcode_2603(CRunningScript* thread) + static OpcodeResult __stdcall opcode_2603(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(str); OPCODE_READ_PARAM_STRING(prefix); @@ -395,7 +397,7 @@ class Text } //2604=3, is_text_sufix %1s% sufix %2s% ignore_case %3d% - static OpcodeResult __stdcall opcode_2604(CRunningScript* thread) + static OpcodeResult __stdcall opcode_2604(CLEO::CRunningScript* thread) { OPCODE_READ_PARAM_STRING(str); OPCODE_READ_PARAM_STRING(sufix); @@ -416,6 +418,30 @@ class Text OPCODE_CONDITION_RESULT(result == 0); return OR_CONTINUE; } + + //2605=-1,display_text_formatted offset_left %1d% offset_top %2d% format %3d% args + static OpcodeResult __stdcall opcode_2605(CLEO::CRunningScript* thread) + { + auto posX = OPCODE_READ_PARAM_FLOAT(); + auto posY = OPCODE_READ_PARAM_FLOAT(); + OPCODE_READ_PARAM_STRING_FORMATTED(text); + + // new GXT label + // includes unprintable character, to ensure there will be no collision with user GXT lables + char gxt[8] = { 0x01, 'C', 'L', 'E', 'O', '_', 0x01, 0x00 }; + gxt[6] += CTheScripts::NumberOfIntroTextLinesThisFrame; // unique label for each possible entry + + textManager.AddFxt(gxt, text); + + auto& draw = CTheScripts::IntroTextLines[CTheScripts::NumberOfIntroTextLinesThisFrame]; + memcpy(&draw.xPosition, &posX, sizeof(draw.xPosition)); // invalid type in Plugin SDK. Just copy memory + memcpy(&draw.yPosition, &posY, sizeof(draw.yPosition)); + strcpy(draw.gxtEntry, gxt); + + CTheScripts::NumberOfIntroTextLinesThisFrame++; + + return OR_CONTINUE; + } } textInstance; CTextManager Text::textManager; diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index 2069d0f6..c3ba88eb 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -123,6 +123,8 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + diff --git a/cleo_plugins/Text/Text.vcxproj.filters b/cleo_plugins/Text/Text.vcxproj.filters index 4c8f5334..81b0b82e 100644 --- a/cleo_plugins/Text/Text.vcxproj.filters +++ b/cleo_plugins/Text/Text.vcxproj.filters @@ -28,7 +28,13 @@ plugin_sdk - + + plugin_sdk + + + plugin_sdk + + plugin_sdk From e01844dbbe336c91220e980c1c9e950868188e07 Mon Sep 17 00:00:00 2001 From: Caner Karaca <37447503+CanerKaraca23@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:59:36 +0300 Subject: [PATCH 139/216] Update Workflows, Add Dependabot (#52) --- .github/dependabot.yml | 11 ++++++++ .github/workflows/main.yml | 23 +++++++---------- .github/workflows/test.yml | 52 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 14 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/test.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..b2a079f1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "daily" \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 491db3a2..f4d9cd70 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,24 +7,20 @@ on: jobs: build: - runs-on: windows-2022 - permissions: - contents: write + runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + with: + submodules: "recursive" - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.1 + uses: microsoft/setup-msbuild@v1 - - uses: actions/checkout@v2 + - uses: actions/setup-node@v4 with: - submodules: "true" + node-version: latest - - uses: actions/setup-node@v3 - with: - node-version: lts/* - - name: Read Version Tag id: read_version run: node.exe .github/workflows/version.js @@ -66,7 +62,6 @@ jobs: rmdir /s /q .output\Release\advanced_plugin_management_example rmdir /s /q .output\Release\scripts - - name: Convert Markdown to HTML id: md_to_html run: | @@ -81,10 +76,10 @@ jobs: path: ./.output/Release/* type: "zip" filename: ${{ steps.read_version.outputs.archive_name }} - exclusions: "*.pdb *.lib *.exp *.map" + exclusions: "*.pdb *.lib *.exp" - name: Upload Release - uses: ncipollo/release-action@v1.10.0 + uses: ncipollo/release-action@main with: token: ${{ secrets.GITHUB_TOKEN }} name: ${{ steps.read_version.outputs.version }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..1292b1ff --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,52 @@ +name: CLEO 5 Test Build + +on: + push: + paths-ignore: + - ".github/*" + - "*.md" + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - name: Add msbuild to PATH + uses: microsoft/setup-msbuild@v1 + + - name: Build Projects + shell: cmd + run: | + set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk + msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA + msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + + - name: Prepare Files + id: prepare_archive + shell: cmd + run: | + @REM create output directory + mkdir .output\Release\cleo + mkdir .output\Release\cleo\cleo_plugins + + @REM copy files + copy third-party\bass\bass.dll .output\Release\bass.dll + copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini + copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins + copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins + + - uses: actions/upload-artifact@v4 + with: + compression-level: 0 + name: SA.CLEO5 + path: | + .output\Release\* + !.output\Release\*.pdb + !.output\Release\*.lib + !.output\Release\*.exp \ No newline at end of file From 5730ef2ad2802c1bd01f97be369213dd205af8a5 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 8 Apr 2024 06:54:33 +0200 Subject: [PATCH 140/216] Fixed TestCheat opcode. (#120) --- source/CCustomOpcodeSystem.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c05fd2c8..e4cbc81c 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1347,10 +1347,11 @@ namespace CLEO //0ADC=1, test_cheat %1d% OpcodeResult __stdcall opcode_0ADC(CRunningScript *thread) { - OPCODE_READ_PARAM_STRING(text); - - auto len = strlen(text); - if (_strnicmp(text, CCheat::m_CheatString, len) == 0) + OPCODE_READ_PARAM_STRING_LEN(text, sizeof(CCheat::m_CheatString)); + + _strrev(_buff_text); // reverse + auto len = strlen(_buff_text); + if (_strnicmp(_buff_text, CCheat::m_CheatString, len) == 0) { CCheat::m_CheatString[0] = '\0'; // consume the cheat SetScriptCondResult(thread, true); From d8fd71851d25211380a1b43d55b33abcd8de69b7 Mon Sep 17 00:00:00 2001 From: Seemann Date: Mon, 8 Apr 2024 08:38:12 -0400 Subject: [PATCH 141/216] Update README.md (#118) --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 58018a3a..e9f17645 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,14 @@ CLEO requires an 'ASI Loader' installed to run which is provided with the releas No additional files are replaced, however the following files and folders are added: - cleo\ (CLEO script directory) -- cleo\.config\sa.json (opcodes info file) +- cleo\\.config\sa.json (opcodes info file) - cleo\cleo_plugins\SA.Audio.cleo (audio playback utilities powered by BASS.dll library) - cleo\cleo_plugins\SA.DebugUtils.cleo (script debugging utilities plugin) - cleo\cleo_plugins\SA.FileSystemOperations.cleo (disk drive files related operations plugin) - cleo\cleo_plugins\SA.IniFiles.cleo (.ini config files handling plugin) - cleo\cleo_plugins\SA.IntOperations.cleo (additional math operations plugin) -- cleo\cleo_plugins\SA.MemoryOperations (memory and .dll libraries utilities plugin) +- cleo\cleo_plugins\SA.MemoryOperations.cleo (memory and .dll libraries utilities plugin) +- cleo\cleo_plugins\SA.Text.cleo (text processing plugin) - cleo\cleo_saves\ (CLEO save directory) - cleo\cleo_text\ (CLEO text directory) - cleo.asi (core library) @@ -36,7 +37,9 @@ CLEO scripts can be found on Grand Theft Auto fansites and modding sites such as ## Compatibility Mode -CLEO is continually being improved and extended over time. In very rare circumstances, some scripts written for CLEO 3 may not work while using CLEO 4. However, since CLEO 4.3 you are able to enable a 'legacy mode' to increase compatibility with CLEO 3 scripts by naming them with the extension '.cs3'. CLEO 4.3 will load '.cs' and '.cs4' scripts normally and load '.cs3' scripts in CLEO 3 compatibility mode, in which certain small behaviours of the CLEO library will change to achieve better compatibility with that script. However, most CLEO 3 scripts will work without the need for compatibility mode being set as CLEO 4.3 also detects certain necessary CLEO 3 behaviours. Specifically, scripts which use the uninitialized storage data after a SCM function call to work. +CLEO is continually being improved and extended over time. In very rare circumstances, new major releases may break some older scripts. To fix this, CLEO provides a 'compatibility mode' to closely emulate behavior of previous versions and improve stability of old scripts. +- To run a script with maximum compatibility with 'CLEO 4', change the script extension from `.cs` to `.cs4`. +- To run a script with maximum compatibility with 'CLEO 3', change the script extension from `.cs` to `.cs3`. ## Credits @@ -51,5 +54,5 @@ Special thanks to: - mfisto for the alpha-testing of CLEO 4, his support and advices. - 123nir for the alpha-testing of CLEO 5.0.0, troubleshooting and valuable bug reports. -The developers have no connection with Take 2 Interactive or Rockstar Games. +The developers are not affiliated with Take 2 Interactive or Rockstar Games. By using this product or any of the additional products included you take your own personal responsibility for any negative consequences should they arise. From fff6c121c31e31c65fc7f9d2389bf72117c12c90 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 9 Apr 2024 01:10:28 +0200 Subject: [PATCH 142/216] IntOperations turned into Math plugin (#116) IntOperations turned into Math plugin. Added bit related opcodes. --- CHANGELOG.md | 9 +- cleo_plugins/CLEO_Plugins.sln | 12 +- .../IntOperations.vcxproj.filters | 6 - .../IntOperations.cpp => Math/Math.cpp} | 132 +++++++++- .../Math.vcxproj} | 12 +- cleo_plugins/Math/Math.vcxproj.filters | 19 ++ cleo_sdk/CLEO_Utils.h | 5 +- source/CCustomOpcodeSystem.cpp | 22 -- tests/cleo_tests/0AEE.txt | 60 ----- tests/cleo_tests/Math/0AEE.txt | 25 ++ tests/cleo_tests/Math/2700.txt | 227 ++++++++++++++++++ tests/cleo_tests/Math/2701.txt | 37 +++ tests/cleo_tests/Math/2702.txt | 37 +++ tests/cleo_tests/Math/2703.txt | 87 +++++++ tests/cleo_tests/Math/2704.txt | 111 +++++++++ 15 files changed, 697 insertions(+), 104 deletions(-) delete mode 100644 cleo_plugins/IntOperations/IntOperations.vcxproj.filters rename cleo_plugins/{IntOperations/IntOperations.cpp => Math/Math.cpp} (71%) rename cleo_plugins/{IntOperations/IntOperations.vcxproj => Math/Math.vcxproj} (95%) create mode 100644 cleo_plugins/Math/Math.vcxproj.filters delete mode 100644 tests/cleo_tests/0AEE.txt create mode 100644 tests/cleo_tests/Math/0AEE.txt create mode 100644 tests/cleo_tests/Math/2700.txt create mode 100644 tests/cleo_tests/Math/2701.txt create mode 100644 tests/cleo_tests/Math/2702.txt create mode 100644 tests/cleo_tests/Math/2703.txt create mode 100644 tests/cleo_tests/Math/2704.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 57a994be..9d6fdf0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,13 +36,20 @@ - new opcode **2302 ([write_block_to_file](https://library.sannybuilder.com/#/sa/file/2302))** - new opcode **2303 ([resolve_filepath](https://library.sannybuilder.com/#/sa/file/2303))** - new opcode **2304 ([get_script_filename](https://library.sannybuilder.com/#/sa/file/2304))** +- new [Math](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/Math) plugin + - math related opcodes moved from CLEO core into separated plugin + - new opcode **2700 ([is_bit_set](https://library.sannybuilder.com/#/sa/math/2700))** + - new opcode **2701 ([set_bit](https://library.sannybuilder.com/#/sa/math/2701))** + - new opcode **2702 ([clear_bit](https://library.sannybuilder.com/#/sa/math/2702))** + - new opcode **2703 ([toggle_bit](https://library.sannybuilder.com/#/sa/math/2703))** + - new opcode **2704 ([is_truthy](https://library.sannybuilder.com/#/sa/math/2704))** - new [MemoryOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/MemoryOperations) plugin - memory related opcodes moved from CLEO core into separated plugin - validation of input and output parameters for all opcodes - opcode **0A8C ([write_memory](https://library.sannybuilder.com/#/sa/memory/0A8C))** now supports strings - new opcode **2400 ([copy_memory](https://library.sannybuilder.com/#/sa/memory/2400))** - new opcode **2401 ([read_memory_with_offset](https://library.sannybuilder.com/#/sa/memory/2401))** - - new opcode **2402 ([writememory_with_offset](https://library.sannybuilder.com/#/sa/memory/2402))** + - new opcode **2402 ([write_memory_with_offset](https://library.sannybuilder.com/#/sa/memory/2402))** - new opcode **2403 ([forget_memory](https://library.sannybuilder.com/#/sa/memory/2403))** - new opcode **2404 ([get_script_struct_just_created](https://library.sannybuilder.com/#/sa/memory/2404))** - new opcode **2405 ([is_script_running](https://library.sannybuilder.com/#/sa/memory/2405))** diff --git a/cleo_plugins/CLEO_Plugins.sln b/cleo_plugins/CLEO_Plugins.sln index ed6aac0f..ee05f7ed 100644 --- a/cleo_plugins/CLEO_Plugins.sln +++ b/cleo_plugins/CLEO_Plugins.sln @@ -7,8 +7,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FileSystemOperations", "Fil EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IniFiles", "IniFiles\IniFiles.vcxproj", "{6831362D-5226-4634-9DB4-266A1B6C3E6C}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IntOperations", "IntOperations\IntOperations.vcxproj", "{68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}" -EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DebugUtils", "DebugUtils\DebugUtils.vcxproj", "{481896C4-0C19-4992-9602-729537774B32}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MemoryOperations", "MemoryOperations\MemoryOperations.vcxproj", "{35C80F79-8B18-4925-8C32-94B320DBE76F}" @@ -17,6 +15,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Audio", "Audio\Audio.vcxpro EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Text", "Text\Text.vcxproj", "{BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Math", "Math\Math.vcxproj", "{68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x86 = Debug|x86 @@ -31,10 +31,6 @@ Global {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Debug|x86.Build.0 = Debug|Win32 {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Release|x86.ActiveCfg = Release|Win32 {6831362D-5226-4634-9DB4-266A1B6C3E6C}.Release|x86.Build.0 = Release|Win32 - {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.ActiveCfg = Debug|Win32 - {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.Build.0 = Debug|Win32 - {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.ActiveCfg = Release|Win32 - {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.Build.0 = Release|Win32 {481896C4-0C19-4992-9602-729537774B32}.Debug|x86.ActiveCfg = Debug|Win32 {481896C4-0C19-4992-9602-729537774B32}.Debug|x86.Build.0 = Debug|Win32 {481896C4-0C19-4992-9602-729537774B32}.Release|x86.ActiveCfg = Release|Win32 @@ -51,6 +47,10 @@ Global {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Debug|x86.Build.0 = Debug|Win32 {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Release|x86.ActiveCfg = Release|Win32 {BD19AEFD-626B-40AE-8D83-6D444D2EFBF8}.Release|x86.Build.0 = Release|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.ActiveCfg = Debug|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Debug|x86.Build.0 = Debug|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.ActiveCfg = Release|Win32 + {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj.filters b/cleo_plugins/IntOperations/IntOperations.vcxproj.filters deleted file mode 100644 index 2d75d234..00000000 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj.filters +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/cleo_plugins/IntOperations/IntOperations.cpp b/cleo_plugins/Math/Math.cpp similarity index 71% rename from cleo_plugins/IntOperations/IntOperations.cpp rename to cleo_plugins/Math/Math.cpp index de0ce001..084a807d 100644 --- a/cleo_plugins/IntOperations/IntOperations.cpp +++ b/cleo_plugins/Math/Math.cpp @@ -5,10 +5,10 @@ using namespace CLEO; using namespace plugin; -class IntOperations +class Math { public: - IntOperations() + Math() { auto cleoVer = CLEO_GetVersion(); if (cleoVer < CLEO_VERSION) @@ -18,12 +18,15 @@ class IntOperations return; } - //register opcodes + // register opcodes CLEO_RegisterOpcode(0x0A8E, opcode_0A8E); // x = a + b (int) CLEO_RegisterOpcode(0x0A8F, opcode_0A8F); // x = a - b (int) CLEO_RegisterOpcode(0x0A90, opcode_0A90); // x = a * b (int) CLEO_RegisterOpcode(0x0A91, opcode_0A91); // x = a / b (int) + CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); // pow + CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); // log + CLEO_RegisterOpcode(0x0B10, Script_IntOp_AND); CLEO_RegisterOpcode(0x0B11, Script_IntOp_OR); CLEO_RegisterOpcode(0x0B12, Script_IntOp_XOR); @@ -39,6 +42,12 @@ class IntOperations CLEO_RegisterOpcode(0x0B1C, Scr_IntOp_SHR); CLEO_RegisterOpcode(0x0B1D, Scr_IntOp_SHL); CLEO_RegisterOpcode(0x0B1E, Sign_Extend); + + CLEO_RegisterOpcode(0x2700, opcode_2700); // is_bit_set + CLEO_RegisterOpcode(0x2701, opcode_2701); // set_bit + CLEO_RegisterOpcode(0x2702, opcode_2702); // clear_bit + CLEO_RegisterOpcode(0x2703, opcode_2703); // toggle_bit + CLEO_RegisterOpcode(0x2704, opcode_2704); // is_truthy } //0A8E=3,%3d% = %1d% + %2d% ; int @@ -89,6 +98,30 @@ class IntOperations return OR_CONTINUE; } + //0AEE=3,%3d% = %1d% exp %2d% // all floats + static OpcodeResult __stdcall opcode_0AEE(CRunningScript* thread) + { + auto base = OPCODE_READ_PARAM_FLOAT(); + auto exponent = OPCODE_READ_PARAM_FLOAT(); + + auto result = (float)pow(base, exponent); + + OPCODE_WRITE_PARAM_FLOAT(result); + return OR_CONTINUE; + } + + //0AEF=3,%3d% = log %1d% base %2d% // all floats + static OpcodeResult __stdcall opcode_0AEF(CRunningScript* thread) + { + auto argument = OPCODE_READ_PARAM_FLOAT(); + auto base = OPCODE_READ_PARAM_FLOAT(); + + auto exponent = log(argument) / log(base); + + OPCODE_WRITE_PARAM_FLOAT(exponent); + return OR_CONTINUE; + } + static OpcodeResult WINAPI Script_IntOp_AND(CScriptThread* thread) /**************************************************************** Opcode Format @@ -310,4 +343,95 @@ class IntOperations return OR_CONTINUE; } -} intOperations; + + //2700=2, is_bit_set value %1d% bit_index %2d% + static OpcodeResult WINAPI opcode_2700(CScriptThread* thread) + { + auto value = OPCODE_READ_PARAM_UINT(); + auto bitIndex = OPCODE_READ_PARAM_INT(); + + if (bitIndex < 0 || bitIndex > 31) + { + SHOW_ERROR("Invalid '%d' bit index argument in script %s\nScript suspended.", bitIndex, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + bool result = (value >> bitIndex) & 1; + + OPCODE_CONDITION_RESULT(result); + return OR_CONTINUE; + } + + //2701=2,set_bit value %1d% bit_index %2d% + static OpcodeResult WINAPI opcode_2701(CScriptThread* thread) + { + auto value = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto bitIndex = OPCODE_READ_PARAM_INT(); + + if (bitIndex < 0 || bitIndex > 31) + { + SHOW_ERROR("Invalid '%d' bit index argument in script %s\nScript suspended.", bitIndex, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + *value |= 1 << bitIndex; + + return OR_CONTINUE; + } + + //2702=2,clear_bit value %1d% bit_index %2d% + static OpcodeResult WINAPI opcode_2702(CScriptThread* thread) + { + auto value = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto bitIndex = OPCODE_READ_PARAM_INT(); + + if (bitIndex < 0 || bitIndex > 31) + { + SHOW_ERROR("Invalid '%d' bit index argument in script %s\nScript suspended.", bitIndex, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + *value &= ~(1 << bitIndex); + + return OR_CONTINUE; + } + + //2703=3,toggle_bit value %1d% bit_index %2d% state %3d% + static OpcodeResult WINAPI opcode_2703(CScriptThread* thread) + { + auto value = OPCODE_READ_PARAM_OUTPUT_VAR_INT(); + auto bitIndex = OPCODE_READ_PARAM_INT(); + auto state = OPCODE_READ_PARAM_BOOL(); + + if (bitIndex < 0 || bitIndex > 31) + { + SHOW_ERROR("Invalid '%d' bit index argument in script %s\nScript suspended.", bitIndex, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + DWORD flag = 1 << bitIndex; + if (state) + *value |= flag; + else + *value &= ~flag; + + return OR_CONTINUE; + } + + //2704=1, is_truthy value %1d% + static OpcodeResult WINAPI opcode_2704(CScriptThread* thread) + { + auto paramType = OPCODE_PEEK_PARAM_TYPE(); + + if(IsImmString(paramType) || IsVarString(paramType)) + { + OPCODE_READ_PARAM_STRING_LEN(text, 1); // one character is all we need + OPCODE_CONDITION_RESULT(text[0] != '\0'); + return OR_CONTINUE; + } + + auto value = OPCODE_READ_PARAM_ANY32(); + OPCODE_CONDITION_RESULT(value != 0); + return OR_CONTINUE; + } +} Math; diff --git a/cleo_plugins/IntOperations/IntOperations.vcxproj b/cleo_plugins/Math/Math.vcxproj similarity index 95% rename from cleo_plugins/IntOperations/IntOperations.vcxproj rename to cleo_plugins/Math/Math.vcxproj index 83dd2abd..98dc5acf 100644 --- a/cleo_plugins/IntOperations/IntOperations.vcxproj +++ b/cleo_plugins/Math/Math.vcxproj @@ -14,7 +14,7 @@ {68A434CF-6390-4FDF-9A15-36A8A9ECEAA9} true Win32Proj - IntOperations + Math 10.0 @@ -44,13 +44,13 @@ $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - SA.IntOperations + SA.Math .cleo $(SolutionDir).output\ $(ProjectDir).obj\$(Configuration)\ - SA.IntOperations + SA.Math .cleo @@ -114,7 +114,11 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - + + + + + diff --git a/cleo_plugins/Math/Math.vcxproj.filters b/cleo_plugins/Math/Math.vcxproj.filters new file mode 100644 index 00000000..7b248992 --- /dev/null +++ b/cleo_plugins/Math/Math.vcxproj.filters @@ -0,0 +1,19 @@ + + + + + + + + {20ddb375-f549-46bb-814d-53e534880d23} + + + + + cleo_sdk + + + cleo_sdk + + + \ No newline at end of file diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index e69e066e..239b6f0f 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -21,6 +21,8 @@ namespace CLEO OPCODE_CONDITION_RESULT(value) // set result OPCODE_SKIP_PARAMS(count) // ignore X params + + OPCODE_PEEK_PARAM_TYPE() // get param type without advancing the script // reading opcode input arguments OPCODE_READ_PARAM_BOOL() @@ -458,11 +460,12 @@ namespace CLEO } #define OPCODE_SKIP_PARAMS(_count) CLEO_SkipOpcodeParams(thread, _count) + #define OPCODE_PEEK_PARAM_TYPE() thread->PeekDataType() // macros for reading opcode input params. Performs type validation, throws error and suspends script if user provided invalid argument type // TOD: add range checks for limited size types? - #define OPCODE_READ_PARAM_BOOL() _readParam(thread).bParam; \ + #define OPCODE_READ_PARAM_BOOL() _readParam(thread).dwParam != false; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_INT8() _readParam(thread).cParam; \ diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index e4cbc81c..db863379 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -55,8 +55,6 @@ namespace CLEO OpcodeResult __stdcall opcode_0AE2(CRunningScript* thread); // get_random_car_in_sphere_no_save_recursive OpcodeResult __stdcall opcode_0AE3(CRunningScript* thread); // get_random_object_in_sphere_no_save_recursive - OpcodeResult __stdcall opcode_0AEE(CRunningScript* thread); // pow - OpcodeResult __stdcall opcode_0AEF(CRunningScript* thread); // log OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform // 2000 free slot // 2001 free slot @@ -246,8 +244,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); - CLEO_RegisterOpcode(0x0AEE, opcode_0AEE); - CLEO_RegisterOpcode(0x0AEF, opcode_0AEF); CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform @@ -1517,24 +1513,6 @@ namespace CLEO return OR_CONTINUE; } - //0AEE=3,%3d% = %1d% exp %2d% //all floats - OpcodeResult __stdcall opcode_0AEE(CRunningScript *thread) - { - float base, arg; - *thread >> base >> arg; - *thread << (float)pow(base, arg); - return OR_CONTINUE; - } - - //0AEF=3,%3d% = log %1d% base %2d% //all floats - OpcodeResult __stdcall opcode_0AEF(CRunningScript *thread) - { - float base, arg; - *thread >> arg >> base; - *thread << (float)(log(arg) / log(base)); - return OR_CONTINUE; - } - //0DD5=1,%1d% = get_platform OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread) { diff --git a/tests/cleo_tests/0AEE.txt b/tests/cleo_tests/0AEE.txt deleted file mode 100644 index f1f10999..00000000 --- a/tests/cleo_tests/0AEE.txt +++ /dev/null @@ -1,60 +0,0 @@ -{$CLEO .s} -{$USE debug} -{$USE file} -{$USE bitwise} -var 0@ : Integer -var 1@ : Integer -var 2@ : Integer -var 3@ : Integer -var 4@ : Integer -var 5@ : Integer -var 6@ : Integer -var 7@ : Integer -var 8@ : Integer -var 9@ : Integer -var 10@ : Integer - -script_name "0AEE" // pow -debug_on - -trace "0A9A (pow)" - - -// perform 3^4 -wait 0 -0AEE: pow number 3.0 power 4.0 result 0@ // tested opcode -if - 0@ == 81.0 -then - trace "~g~~h~~h~0AEE (pow), #0 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEE (pow), #0 FAILED!~n~%f Expected~n~%f Occured" 81.0 0@ -end - - -// perform 1.2^3.4 -wait 0 -0AEE: pow number 1.2 power 3.4 result 0@ // tested opcode -if and - 0@ > 1.8 - 0@ < 1.9 -then - trace "~g~~h~~h~0AEE (pow), #1 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEE (pow), #1 FAILED!~n~%f Expected~n~%f Occured" 1.858 0@ -end - - -// perform 3.1415^0.0 -wait 0 -0AEE: pow number 3.1415 power 0.0 result 0@ // tested opcode -if - 0@ == 1.0 -then - trace "~g~~h~~h~0AEE (pow), #2 PASSED" -else - breakpoint "~r~~h~~h~~h~0AEE (pow), #2 FAILED!~n~%f Expected~n~%f Occured" 1.0 0@ -end - - -terminate_this_custom_script diff --git a/tests/cleo_tests/Math/0AEE.txt b/tests/cleo_tests/Math/0AEE.txt new file mode 100644 index 00000000..5ff58e58 --- /dev/null +++ b/tests/cleo_tests/Math/0AEE.txt @@ -0,0 +1,25 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '0AEE' +test("0AEE (pow)", tests) +terminate_this_custom_script + +function tests + it("should power numbers", test1) + return + + function test1 + 0AEE: pow {number} 3.0 {power} 4.0 {var_result} 0@ + assert_eqf(0@, 81.0) + + 0AEE: pow {number} 16.0 {power} 0.25 {var_result} 0@ + assert_eqf(0@, 2.0) + + 0AEE: pow {number} 3.1415 {power} 0.0 {var_result} 0@ + assert_eqf(0@, 1.0) + + 0AEE: pow {number} 3.0 {power} 1.0 {var_result} 0@ + assert_eqf(0@, 3.0) + end +end diff --git a/tests/cleo_tests/Math/2700.txt b/tests/cleo_tests/Math/2700.txt new file mode 100644 index 00000000..b9ef22ad --- /dev/null +++ b/tests/cleo_tests/Math/2700.txt @@ -0,0 +1,227 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2700' +test("2700 (is_bit_set)", tests) +terminate_this_custom_script + +function tests + it("should has no bit set", test1) + it("should has all bits set", test2) + it("should test some bits", test3) + return + + function test1 + 0@ = 0x00000000 + + 2700: is_bit_set {number} 0@ {bitIndex} 0 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 1 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 2 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 3 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 4 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 5 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 6 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 7 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 8 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 9 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 10 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 11 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 12 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 13 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 14 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 15 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 16 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 17 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 18 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 19 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 20 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 21 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 22 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 23 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 24 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 25 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 26 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 27 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 28 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 29 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 30 + assert_result_false() + + 2700: is_bit_set {number} 0@ {bitIndex} 31 + assert_result_false() + end + + function test2 + 0@ = 0xFFFFFFFF + + 2700: is_bit_set {number} 0@ {bitIndex} 0 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 1 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 2 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 3 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 4 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 5 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 6 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 7 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 8 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 9 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 10 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 11 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 12 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 13 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 14 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 15 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 16 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 17 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 18 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 19 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 20 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 21 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 22 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 23 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 24 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 25 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 26 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 27 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 28 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 29 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 30 + assert_result_true() + + 2700: is_bit_set {number} 0@ {bitIndex} 31 + assert_result_true() + end + + function test3 + 2700: is_bit_set {number} 0x00000001 {bitIndex} 0 + assert_result_true() + + 2700: is_bit_set {number} 0x00000004 {bitIndex} 2 + assert_result_true() + + 2700: is_bit_set {number} 0x00004000 {bitIndex} 14 + assert_result_true() + + 2700: is_bit_set {number} 0x80000000 {bitIndex} 31 + assert_result_true() + end +end diff --git a/tests/cleo_tests/Math/2701.txt b/tests/cleo_tests/Math/2701.txt new file mode 100644 index 00000000..492f2814 --- /dev/null +++ b/tests/cleo_tests/Math/2701.txt @@ -0,0 +1,37 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2701' +test("2701 (set_bit)", tests) +terminate_this_custom_script + +function tests + it("should set bits", test1) + return + + function test1 + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 0 + assert_eq(0@, 0x12345679) + + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 3 // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 13 + assert_eq(0@, 0x12347678) + + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 14 // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 28 // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2701: set_bit {var_number} 0@ {bitIndex} 31 + assert_eq(0@, 0x92345678) + end +end diff --git a/tests/cleo_tests/Math/2702.txt b/tests/cleo_tests/Math/2702.txt new file mode 100644 index 00000000..5986d426 --- /dev/null +++ b/tests/cleo_tests/Math/2702.txt @@ -0,0 +1,37 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2702' +test("2702 (clear_bit)", tests) +terminate_this_custom_script + +function tests + it("should clear bits", test1) + return + + function test1 + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 0 // was not set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 3 + assert_eq(0@, 0x12345670) + + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 13 // was not set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 14 + assert_eq(0@, 0x12341678) + + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 28 + assert_eq(0@, 0x02345678) + + 0@ = 0x12345678 + 2702: clear_bit {var_number} 0@ {bitIndex} 31 // was not set + assert_eq(0@, 0x12345678) + end +end diff --git a/tests/cleo_tests/Math/2703.txt b/tests/cleo_tests/Math/2703.txt new file mode 100644 index 00000000..7d19d440 --- /dev/null +++ b/tests/cleo_tests/Math/2703.txt @@ -0,0 +1,87 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2703' +test("2703 (toggle_bit)", tests) +terminate_this_custom_script + +function tests + it("should toggle bits", test1) + it("should handle numbers as bool", test2) + return + + function test1 + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} true + assert_eq(0@, 0x12345679) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} false // was not set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 3 {state} true // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 3 {state} false + assert_eq(0@, 0x12345670) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 13 {state} true + assert_eq(0@, 0x12347678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 13 {state} false // was not set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 14 {state} true // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 14 {state} false + assert_eq(0@, 0x12341678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 28 {state} true // already set + assert_eq(0@, 0x12345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 28 {state} false + assert_eq(0@, 0x02345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 31 {state} true + assert_eq(0@, 0x92345678) + + 0@ = 0x12345678 + 2703: toggle_bit {var_number} 0@ {bitIndex} 31 {state} false // was not set + assert_eq(0@, 0x12345678) + end + + function test2 + 0@ = 0xF + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} 0 + assert_eq(0@, 0xE) + + 0@ = 0x0 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} 1 + assert_eq(0@, 0x1) + + 0@ = 0x0 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} 2 + assert_eq(0@, 0x1) + + 0@ = 0x0 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} -1 + assert_eq(0@, 0x1) + + 0@ = 0x0 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} 0x00010000 + assert_eq(0@, 0x1) + + 0@ = 0x0 + 2703: toggle_bit {var_number} 0@ {bitIndex} 0 {state} 0x10000000 + assert_eq(0@, 0x1) +end diff --git a/tests/cleo_tests/Math/2704.txt b/tests/cleo_tests/Math/2704.txt new file mode 100644 index 00000000..0c1fcfdb --- /dev/null +++ b/tests/cleo_tests/Math/2704.txt @@ -0,0 +1,111 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2704' +test("2704 (is_truthy)", tests) +terminate_this_custom_script + +function tests + it("should be false", test1) + it("should be true", test2) + return + + function test1 + // int + 2704: is_truthy {value} 0 + assert_result_false() + + 0@ = 0 + 2704: is_truthy {value} 0@ + assert_result_false() + + // float + 2704: is_truthy {value} 0.0 + assert_result_false() + + 0@ = 0.0 + 2704: is_truthy {value} 0@ + assert_result_false() + + // short string + 2704: is_truthy {value} '' + assert_result_false() + + 0@s = '' + 2704: is_truthy {value} 0@s + assert_result_false() + + // long string + 2704: is_truthy {value} "" + assert_result_false() + + 0@v = "" + 2704: is_truthy {value} 0@v + assert_result_false() + end + + function test2 + // int + 2704: is_truthy {value} 1 + assert_result_true() + + 0@ = 1 + 2704: is_truthy {value} 0@ + assert_result_true() + + 2704: is_truthy {value} 0x00001000 + assert_result_true() + + 2704: is_truthy {value} -1 + assert_result_true() + + // float + 0@ = 0.000001 + 2704: is_truthy {value} 0@ + assert_result_true() + + 2704: is_truthy {value} 1.0 + assert_result_true() + + 0@ = 0.000001 + 2704: is_truthy {value} 0@ + assert_result_true() + + 2704: is_truthy {value} 0.000001 + assert_result_true() + + 2704: is_truthy {value} -0.0 // it is 0x80000000 + assert_result_true() + + // short string + 2704: is_truthy {value} 'a' + assert_result_true() + + 0@s = 'a' + 2704: is_truthy {value} 0@s + assert_result_true() + + 2704: is_truthy {value} ' ' + assert_result_true() + + 2704: is_truthy {value} 'null' + assert_result_true() + + // long string + 2704: is_truthy {value} "a" + assert_result_true() + + 0@v = "a" + 2704: is_truthy {value} 0@v + assert_result_true() + + 2704: is_truthy {value} " " + assert_result_true() + + 2704: is_truthy {value} "null" + assert_result_true() + + 2704: is_truthy {value} "some very long testing string" + assert_result_true() + end +end From e24e326bd417dba744891ed9d39d3e448cfc5886 Mon Sep 17 00:00:00 2001 From: Seemann Date: Mon, 8 Apr 2024 20:10:18 -0400 Subject: [PATCH 143/216] Update CModuleSystem.cpp (#121) --- source/CModuleSystem.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 6b99a18c..0e7d5597 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -417,6 +417,13 @@ bool CModuleSystem::CModule::ModuleExport::LoadFromFile(std::ifstream& file) return false; } + // skip flags (1 byte) and address (4 bytes) + file.seekg(5, file.cur); + if (file.fail()) + { + return false; + } + return true; // done } From 3535844057fa2c475eeff8e0666b183fe98097b8 Mon Sep 17 00:00:00 2001 From: Seemann Date: Mon, 8 Apr 2024 20:10:42 -0400 Subject: [PATCH 144/216] release with multiple asi loader versions (#119) --- .github/workflows/main.yml | 61 +++++++++++++++++++++++++++++------- .github/workflows/version.js | 16 ++++++++-- CHANGELOG.md | 59 ++++++++++++++++++---------------- README.md | 12 +++---- 4 files changed, 102 insertions(+), 46 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f4d9cd70..7092a94c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -32,7 +32,7 @@ jobs: msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - name: Prepare Files + - name: Prepare Base Files id: prepare_archive shell: cmd run: | @@ -51,17 +51,13 @@ jobs: copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll xcopy /E /I tests .output\Release\cleo + + @REM copy SDK + copy .output\Release\CLEO.lib cleo_sdk\CLEO.lib @REM download Sanny Builder Library json curl https://raw.githubusercontent.com/sannybuilder/library/master/sa/sa.json -o .output\Release\cleo\.config\sa.json - @REM install Silent's ASI Loader - curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip - powershell.exe -NoP -NonI -Command "Expand-Archive '.\silents_asi_loader_13.zip' '.\.output\Release'" - move .output\Release\ReadMe.txt ".output\Release\cleo_readme\ASI Loader Readme.txt" - rmdir /s /q .output\Release\advanced_plugin_management_example - rmdir /s /q .output\Release\scripts - - name: Convert Markdown to HTML id: md_to_html run: | @@ -70,14 +66,57 @@ jobs: move README.html .output\Release\cleo_readme\README.html move CHANGELOG.html .output\Release\cleo_readme\CHANGELOG.html - - name: Pack binaries (Main) + - name: Download ASI Loaders + id: download_asi_loaders + shell: cmd + run: | + xcopy /E /I .output\Release .output\Release_with_Silent_ASI_Loader + xcopy /E /I .output\Release .output\Release_with_Ultimate_ASI_Loader + + @REM install Silent's ASI Loader + curl https://silent.rockstarvision.com/uploads/silents_asi_loader_13.zip -o silents_asi_loader_13.zip + powershell.exe -NoP -NonI -Command "Expand-Archive '.\silents_asi_loader_13.zip' '.\.output\Release_with_Silent_ASI_Loader'" + move .output\Release_with_Silent_ASI_Loader\ReadMe.txt ".output\Release_with_Silent_ASI_Loader\cleo_readme\ASI Loader Readme.txt" + rmdir /s /q .output\Release_with_Silent_ASI_Loader\advanced_plugin_management_example + rmdir /s /q .output\Release_with_Silent_ASI_Loader\scripts + + @REM install Ultimate ASI Loader + curl https://github.com/ThirteenAG/Ultimate-ASI-Loader/releases/download/Win32-latest/vorbisFile-Win32.zip -L -o ual.zip + powershell.exe -NoP -NonI -Command "Expand-Archive '.\ual.zip' '.\.output\Release_with_Ultimate_ASI_Loader'" + rm .\.output\Release_with_Ultimate_ASI_Loader\vorbisFile-Win32.SHA512 + + - name: Pack Base Archive uses: ThirteenAG/zip-release@master with: path: ./.output/Release/* type: "zip" - filename: ${{ steps.read_version.outputs.archive_name }} + filename: SA.CLEO_${{ github.ref_name }}.zip + exclusions: "*.pdb *.lib *.exp" + + - name: Pack Base + Silent's ASI Loader + uses: ThirteenAG/zip-release@master + with: + path: ./.output/Release_with_Silent_ASI_Loader/* + type: "zip" + filename: SA.CLEO_${{ github.ref_name }}+Silent_ASI_Loader.zip + exclusions: "*.pdb *.lib *.exp" + + - name: Pack Base + UAL + uses: ThirteenAG/zip-release@master + with: + path: ./.output/Release_with_Ultimate_ASI_Loader/* + type: "zip" + filename: SA.CLEO_${{ github.ref_name }}+Ultimate_ASI_Loader.zip exclusions: "*.pdb *.lib *.exp" + - name: CLEO SDK + uses: ThirteenAG/zip-release@master + with: + path: ./cleo_sdk/* + type: "zip" + filename: SA.CLEO_${{ github.ref_name }}_SDK.zip + exclusions: "*.pdb *.exp" + - name: Upload Release uses: ncipollo/release-action@main with: @@ -86,4 +125,4 @@ jobs: bodyFile: 'changes.txt' # generated in read_version tag: ${{ github.ref_name }} prerelease: ${{ contains(github.ref_name, 'beta') || contains(github.ref_name, 'alpha') }} - artifacts: ${{ steps.read_version.outputs.archive_name }} + artifacts: "SA.CLEO_*.zip" diff --git a/.github/workflows/version.js b/.github/workflows/version.js index 67613633..0063d40a 100644 --- a/.github/workflows/version.js +++ b/.github/workflows/version.js @@ -5,7 +5,6 @@ const { GITHUB_OUTPUT, GITHUB_REF_NAME } = process.env; if (GITHUB_REF_NAME) { const version = GITHUB_REF_NAME.startsWith("v") ? GITHUB_REF_NAME.slice(1) : GITHUB_REF_NAME; addOutput("version", version); - addOutput("archive_name", `SA.CLEO_${GITHUB_REF_NAME}.zip`); // update cleo.h to replace version const cleoH = readFileSync("cleo_sdk/cleo.h", { encoding: "utf-8" }); @@ -29,7 +28,20 @@ function addOutput(key, value) { function getChanges() { const lines = changelog.split(EOL); - const result = []; + const result = [ + `## Download Instructions`, + `An ASI loader is required for CLEO 5 to work. CLEO 5 comes pre-packaged with several popular ASI Loaders ([Silent's ASI Loader](https://cookieplmonster.github.io/mods/gta-sa/#asiloader) and [Ultimate ASI Loader](https://github.com/ThirteenAG/Ultimate-ASI-Loader)).`, + `#### If you don't have an ASI loader installed already or unsure which one to download:`, + `- Download [CLEO ${GITHUB_REF_NAME} with Silent's ASI Loader](https://github.com/cleolibrary/CLEO5/releases/download/${GITHUB_REF_NAME}/SA.CLEO_${GITHUB_REF_NAME}+Silent_ASI_Loader.zip)`, + `#### If you prefer Ultimate ASI Loader:`, + `- Download [CLEO ${GITHUB_REF_NAME} with Ultimate ASI Loader](https://github.com/cleolibrary/CLEO5/releases/download/${GITHUB_REF_NAME}/SA.CLEO_${GITHUB_REF_NAME}+Ultimate_ASI_Loader.zip)`, + `#### If you have an ASI loader installed already:`, + `- Download [this archive](https://github.com/cleolibrary/CLEO5/releases/download/${GITHUB_REF_NAME}/SA.CLEO_${GITHUB_REF_NAME}.zip) which contains ONLY CLEO 5 library and plugins.`, + `## Installation`, + `- Unzip the archive to GTA San Andreas game directory.`, + `## Changelog`, + ]; + for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line.trimStart().startsWith("## ")) { diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d6fdf0a..546caa67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,12 +71,12 @@ - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - - **cleo_return_\*** opcodes now can pass strings as return arguments + - **cleo*return*\*** opcodes now can pass strings as return arguments - SCM functions **(0AB1)** now keep their own GOSUB's call stack - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strings longer than target variable - fixed result register not being cleared before function call in opcodes **0AA7** and **0AA8** - changes in file operations - - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: + - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `root:\` for _game root_ directory - `userfiles:\` for _game save files_ directory - `.\` for _this script file_ directory @@ -89,6 +89,7 @@ - updated included Silent's ASI Loader to version 1.3 ### Bug Fixes + - fixed error in **004E (terminate_this_script)** allowing to run multiple missions - fixed handling of strings longer than 128 characters causing errors in some cases - fixed error in handling of first string argument in **0AF5 (write_string to_ini_file)** @@ -98,35 +99,37 @@ - fixed invalid 7 characters length limit of **0AAA (get_script_struct_named)** #### SDK AND PLUGINS + - now all opcodes in range **0-7FFF** can be registered by plugins - plugins moved to _cleo\cleo_plugins_ directory -- new SDK method: CLEO_RegisterCommand -- new SDK method: CLEO_RegisterCallback -- new SDK method: CLEO_GetVarArgCount -- new SDK method: CLEO_PeekIntOpcodeParam -- new SDK method: CLEO_PeekFloatOpcodeParam -- new SDK method: CLEO_PeekPointerToScriptVariable -- new SDK method: CLEO_SkipUnusedVarArgs -- new SDK method: CLEO_ReadParamsFormatted -- new SDK method: CLEO_ReadStringParamWriteBuffer -- new SDK method: CLEO_GetOpcodeParamsArray -- new SDK method: CLEO_GetParamsHandledCount -- new SDK method: CLEO_GetScriptVersion -- new SDK method: CLEO_GetScriptInfoStr -- new SDK method: CLEO_GetScriptFilename -- new SDK method: CLEO_GetScriptWorkDir -- new SDK method: CLEO_SetScriptWorkDir -- new SDK method: CLEO_ResolvePath -- new SDK method: CLEO_ListDirectory -- new SDK method: CLEO_ListDirectoryFree -- new SDK method: CLEO_GetScriptByName -- new SDK method: CLEO_GetScriptByFilename -- new SDK method: CLEO_GetScriptDebugMode -- new SDK method: CLEO_SetScriptDebugMode -- new SDK method: CLEO_Log - +- new SDK methods: + - CLEO_RegisterCommand + - CLEO_RegisterCallback + - CLEO_GetVarArgCount + - CLEO_PeekIntOpcodeParam + - CLEO_PeekFloatOpcodeParam + - CLEO_PeekPointerToScriptVariable + - CLEO_SkipUnusedVarArgs + - CLEO_ReadParamsFormatted + - CLEO_ReadStringParamWriteBuffer + - CLEO_GetOpcodeParamsArray + - CLEO_GetParamsHandledCount + - CLEO_GetScriptVersion + - CLEO_GetScriptInfoStr + - CLEO_GetScriptFilename + - CLEO_GetScriptWorkDir + - CLEO_SetScriptWorkDir + - CLEO_ResolvePath + - CLEO_ListDirectory + - CLEO_ListDirectoryFree + - CLEO_GetScriptByName + - CLEO_GetScriptByFilename + - CLEO_GetScriptDebugMode + - CLEO_SetScriptDebugMode + - CLEO_Log #### CLEO internal + - introduced unit test scripts - project migrated to VS 2022 - configured game debugging settings @@ -135,7 +138,9 @@ - added setup_env.bat script #### Special Thanks + - **123nir** for the alpha-testing, troubleshooting and valuable bug reports ## Older + For previous changes, see [CLEO4 changelog](https://github.com/cleolibrary/CLEO4/blob/master/CHANGELOG.md) diff --git a/README.md b/README.md index e9f17645..fc26727d 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,12 @@ CLEO is a hugely popular extensible library plugin which brings new possibilitie ## Installation -CLEO requires an 'ASI Loader' installed to run which is provided with the release. The ASI Loader requires overwriting one original game file: vorbisFile.dll - be sure to make a backup of this file. -No additional files are replaced, however the following files and folders are added: +An ASI loader is required for CLEO 5 to work. CLEO 5 is bundled with several popular ASI Loaders ([Silent's ASI Loader](https://cookieplmonster.github.io/mods/gta-sa/#asiloader) and [Ultimate ASI Loader](https://github.com/ThirteenAG/Ultimate-ASI-Loader/)). + +Follow the instructions on the [release page](https://github.com/cleolibrary/CLEO5/releases) to choose a bundle that works best for you. + +The ASI Loader replaces one original game file: `vorbisFile.dll` - be sure to make a backup of this file. +CLEO itself does not replace any game file, however the following files and folders are added: - cleo\ (CLEO script directory) - cleo\\.config\sa.json (opcodes info file) @@ -20,8 +24,6 @@ No additional files are replaced, however the following files and folders are ad - cleo\cleo_text\ (CLEO text directory) - cleo.asi (core library) - bass.dll (audio engine library) -- vorbisFile.dll (Silent's ASI Loader) -- vorbisHooked.dll (original vorbisFile.dll file) All plugins are optional, however they may be required by various CLEO scripts. @@ -45,8 +47,6 @@ CLEO is continually being improved and extended over time. In very rare circumst The author and original developer of the CLEO library is Seemann. Development of CLEO 4 was led by Alien and Deji, later turned into CLEO 5 by Miran. Today the CLEO library is an open-source project being maintained at https://github.com/cleolibrary -The author of the ASI Loader is Silent. Find out more at: https://gtaforums.com/topic/523982-relopensrc-silents-asi-loader/ - Special thanks to: - Stanislav Golovin (a.k.a. listener) for his great work in exploration of the GTA series. From 0ebf3c4b14c01475b31b8c54aba1b58a45a97b9e Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 9 Apr 2024 10:14:18 -0400 Subject: [PATCH 145/216] update test framework to latest sb (#117) --- .../cleo_tests/FilesystemOperations/0A9A.txt | 19 ++--- tests/cleo_tests/MemoryOperations/0A8C.txt | 18 ++--- tests/cleo_tests/MemoryOperations/0A8D.txt | 14 ++-- tests/cleo_tests/MemoryOperations/0A96.txt | 4 +- tests/cleo_tests/MemoryOperations/0A97.txt | 4 +- tests/cleo_tests/MemoryOperations/0A98.txt | 4 +- tests/cleo_tests/MemoryOperations/0AA4.txt | 4 +- tests/cleo_tests/MemoryOperations/0AC6.txt | 4 +- tests/cleo_tests/MemoryOperations/0AC7.txt | 4 +- tests/cleo_tests/MemoryOperations/0AC8.txt | 6 +- tests/cleo_tests/MemoryOperations/0AC9.txt | 4 +- tests/cleo_tests/MemoryOperations/0AE9.txt | 15 ++-- tests/cleo_tests/MemoryOperations/0AEA.txt | 15 ++-- tests/cleo_tests/MemoryOperations/0AEB.txt | 13 +--- tests/cleo_tests/MemoryOperations/0AEC.txt | 13 +--- tests/cleo_tests/Text/0AD3.txt | 10 +-- tests/cleo_tests/Text/0AD4.txt | 12 +-- tests/cleo_tests/Text/0ADB.txt | 4 +- tests/cleo_tests/Text/0ADE.txt | 8 +- tests/cleo_tests/Text/0ADF.txt | 4 +- tests/cleo_tests/Text/0AE0.txt | 4 +- tests/cleo_tests/Text/0AED.txt | 4 +- tests/cleo_tests/Text/2600.txt | 14 ++-- tests/cleo_tests/Text/2601.txt | 6 +- tests/cleo_tests/Text/2602.txt | 6 +- tests/cleo_tests/Text/2603.txt | 6 +- tests/cleo_tests/Text/2604.txt | 6 +- tests/cleo_tests/cleo_tester.inc | 77 ++++++++++--------- 28 files changed, 140 insertions(+), 162 deletions(-) diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index d0ebcf50..8416d34b 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -2,31 +2,24 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A9A" // open_file -debug_on - -test("0A9A (open_file)", @tests) +test("0A9A (open_file)", tests) terminate_this_custom_script function tests - it("should fail on a non-existing file", @test1) - it("should open existing file", @test2) + it("should fail on a non-existing file", test1) + it("should open existing file", test2) return function test1 - if - 0@ = open_file "cleo\not_a_file.txt" {mode} "r" // tested opcode - then - assert(false) - else - assert(true) - end + 0@ = open_file "cleo\\not_a_file.txt" {mode} "r" // tested opcode + assert_result_false() end function test2 if - 0@ = open_file "cleo\.cleo.log" {mode} "r" // tested opcode + 0@ = open_file "cleo\\.cleo.log" {mode} "r" // tested opcode then assert(true) close_file 0@ diff --git a/tests/cleo_tests/MemoryOperations/0A8C.txt b/tests/cleo_tests/MemoryOperations/0A8C.txt index d8b8f543..5b0a764e 100644 --- a/tests/cleo_tests/MemoryOperations/0A8C.txt +++ b/tests/cleo_tests/MemoryOperations/0A8C.txt @@ -2,21 +2,21 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0A8C' -test("0A8C (write_memory)", @tests) +test("0A8C (write_memory)", tests) terminate_this_custom_script function tests before_each(@before) - it("should write 0 bytes", @test1) - it("should write 1 byte", @test2) - it("should write 2 bytes", @test3) - it("should write 3 bytes", @test4) - it("should write 4 bytes", @test5) - it("should write 5 bytes", @test6) - it("should write 7 bytes", @test7) - it("should write float", @test8) + it("should write 0 bytes", test1) + it("should write 1 byte", test2) + it("should write 2 bytes", test3) + it("should write 3 bytes", test4) + it("should write 4 bytes", test5) + it("should write 5 bytes", test6) + it("should write 7 bytes", test7) + it("should write float", test8) return :before diff --git a/tests/cleo_tests/MemoryOperations/0A8D.txt b/tests/cleo_tests/MemoryOperations/0A8D.txt index 9e2857fa..a85c3a94 100644 --- a/tests/cleo_tests/MemoryOperations/0A8D.txt +++ b/tests/cleo_tests/MemoryOperations/0A8D.txt @@ -2,18 +2,18 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0A8D' -test("0A8D (read_memory)", @tests) +test("0A8D (read_memory)", tests) terminate_this_custom_script function tests before_each(@prepare_tests) - it("should read 0 bytes", @test1) - it("should read 1 byte", @test2) - it("should read 2 bytes", @test3) - it("should read 3 bytes", @test4) - it("should read 4 bytes", @test5) - it("should read float", @test6) + it("should read 0 bytes", test1) + it("should read 1 byte", test2) + it("should read 2 bytes", test3) + it("should read 3 bytes", test4) + it("should read 4 bytes", test5) + it("should read float", test6) return diff --git a/tests/cleo_tests/MemoryOperations/0A96.txt b/tests/cleo_tests/MemoryOperations/0A96.txt index 2cc17b0f..fb974bda 100644 --- a/tests/cleo_tests/MemoryOperations/0A96.txt +++ b/tests/cleo_tests/MemoryOperations/0A96.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A96" // get_ped_pointer -test("0A96 (get_ped_pointer)", @tests) +test("0A96 (get_ped_pointer)", tests) terminate_this_custom_script function tests - it("should return valid pointer", @test1) + it("should return valid pointer", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0A97.txt b/tests/cleo_tests/MemoryOperations/0A97.txt index e1472875..fdb40778 100644 --- a/tests/cleo_tests/MemoryOperations/0A97.txt +++ b/tests/cleo_tests/MemoryOperations/0A97.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A97" // get_vehicle_pointer -test("0A97 (get_vehicle_pointer)", @tests) +test("0A97 (get_vehicle_pointer)", tests) terminate_this_custom_script function tests - it("should return a valid pointer", @test1) + it("should return a valid pointer", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0A98.txt b/tests/cleo_tests/MemoryOperations/0A98.txt index 26c6d9ae..4df84f1f 100644 --- a/tests/cleo_tests/MemoryOperations/0A98.txt +++ b/tests/cleo_tests/MemoryOperations/0A98.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0A98" // get_vehicle_pointer -test("0A98 (get_object_pointer)", @tests) +test("0A98 (get_object_pointer)", tests) terminate_this_custom_script function tests - it("should return a valid pointer", @test1) + it("should return a valid pointer", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt index 5b9d9012..e746a223 100644 --- a/tests/cleo_tests/MemoryOperations/0AA4.txt +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AA4" -test("0AA4 (get_dynamic_library_procedure)", @tests) +test("0AA4 (get_dynamic_library_procedure)", tests) terminate_this_custom_script function tests - it("should return address of Sleep function from kernel32.dll", @test1) + it("should return address of Sleep function from kernel32.dll", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AC6.txt b/tests/cleo_tests/MemoryOperations/0AC6.txt index cf2585b7..3d5abb5d 100644 --- a/tests/cleo_tests/MemoryOperations/0AC6.txt +++ b/tests/cleo_tests/MemoryOperations/0AC6.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC6" // get_label_pointer -test("0AC6 (get_label_pointer)", @tests) +test("0AC6 (get_label_pointer)", tests) terminate_this_custom_script function tests - it("should return valid pointer", @test1) + it("should return valid pointer", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AC7.txt b/tests/cleo_tests/MemoryOperations/0AC7.txt index 45190382..8ebd7566 100644 --- a/tests/cleo_tests/MemoryOperations/0AC7.txt +++ b/tests/cleo_tests/MemoryOperations/0AC7.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC7" // get_var_pointer -test("0AC7 (get_var_pointer)", @tests) +test("0AC7 (get_var_pointer)", tests) terminate_this_custom_script function tests - it("should return valid pointer", @test1) + it("should return valid pointer", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AC8.txt b/tests/cleo_tests/MemoryOperations/0AC8.txt index 6998e06c..0ac296b3 100644 --- a/tests/cleo_tests/MemoryOperations/0AC8.txt +++ b/tests/cleo_tests/MemoryOperations/0AC8.txt @@ -2,7 +2,7 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC8" // allocate_memory -test("0AC8 (allocate_memory)", @tests) +test("0AC8 (allocate_memory)", tests) terminate_this_custom_script function tests @@ -10,8 +10,8 @@ function tests before_each(@allocate) after_each(@free) - it("should return valid pointer", @test1) - it("should point to zero-filled mem in CLEO5", @test2) + it("should return valid pointer", test1) + it("should point to zero-filled mem in CLEO5", test2) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AC9.txt b/tests/cleo_tests/MemoryOperations/0AC9.txt index aa822cb6..a60575f0 100644 --- a/tests/cleo_tests/MemoryOperations/0AC9.txt +++ b/tests/cleo_tests/MemoryOperations/0AC9.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AC9" // free_memory -test("0AC9 (free_memory)", @tests) +test("0AC9 (free_memory)", tests) terminate_this_custom_script function tests - it("should free allocated memory", @test1) + it("should free allocated memory", test1) return function test1 diff --git a/tests/cleo_tests/MemoryOperations/0AE9.txt b/tests/cleo_tests/MemoryOperations/0AE9.txt index 9e73de1e..3b2f6322 100644 --- a/tests/cleo_tests/MemoryOperations/0AE9.txt +++ b/tests/cleo_tests/MemoryOperations/0AE9.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AE9" // pop_float -test("0AE9 (pop_float)", @tests) +test("0AE9 (pop_float)", tests) terminate_this_custom_script function tests - it("should pop float from stack", @test1) + it("should pop float from stack", test1) return @@ -17,14 +17,9 @@ function tests call_function 0x0823CEE {argCount} 1 {pop} 1 {arg} 2@ // double atof(const char *) pop_float {result} 3@ - - if - 3@ == 42.5 - then - assert(true) - else - assert(false) - end + + 3@ == 42.5 + assert_result_true() end end diff --git a/tests/cleo_tests/MemoryOperations/0AEA.txt b/tests/cleo_tests/MemoryOperations/0AEA.txt index 5b125a4c..466f452d 100644 --- a/tests/cleo_tests/MemoryOperations/0AEA.txt +++ b/tests/cleo_tests/MemoryOperations/0AEA.txt @@ -2,24 +2,19 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEA" // get_ped_ref -test("0AEA (get_ped_ref)", @tests) +test("0AEA (get_ped_ref)", tests) terminate_this_custom_script function tests - it("should return ped handle for pointer", @test1) + it("should return ped handle for pointer", test1) return function test1 int handle = get_player_char 0 int ptr = get_ped_pointer handle int handle2 = get_ped_ref ptr - - if - handle == handle2 - then - assert(true) - else - assert(false) - end + + handle == handle2 + assert_result_true() end end diff --git a/tests/cleo_tests/MemoryOperations/0AEB.txt b/tests/cleo_tests/MemoryOperations/0AEB.txt index f60d5929..14056bbd 100644 --- a/tests/cleo_tests/MemoryOperations/0AEB.txt +++ b/tests/cleo_tests/MemoryOperations/0AEB.txt @@ -2,11 +2,11 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEB" // get_vehicle_ref -test("0AEB (get_vehicle_ref)", @tests) +test("0AEB (get_vehicle_ref)", tests) terminate_this_custom_script function tests - it("should return vehicle handle for pointer", @test1) + it("should return vehicle handle for pointer", test1) return function test1 @@ -17,13 +17,8 @@ function tests int handle2 = get_vehicle_ref ptr mark_car_as_no_longer_needed handle - if - handle == handle2 - then - assert(true) - else - assert(false) - end + handle == handle2 + assert_result_true() end end diff --git a/tests/cleo_tests/MemoryOperations/0AEC.txt b/tests/cleo_tests/MemoryOperations/0AEC.txt index ae56071e..7aeb0c50 100644 --- a/tests/cleo_tests/MemoryOperations/0AEC.txt +++ b/tests/cleo_tests/MemoryOperations/0AEC.txt @@ -2,11 +2,11 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name "0AEC" // get_object_ref -test("0AEC (get_object_ref)", @tests) +test("0AEC (get_object_ref)", tests) terminate_this_custom_script function tests - it("should return vehicle handle for pointer", @test1) + it("should return vehicle handle for pointer", test1) return function test1 @@ -17,13 +17,8 @@ function tests int handle2 = get_object_ref ptr mark_object_as_no_longer_needed handle - if - handle == handle2 - then - assert(true) - else - assert(false) - end + handle == handle2 + assert_result_true() end end diff --git a/tests/cleo_tests/Text/0AD3.txt b/tests/cleo_tests/Text/0AD3.txt index 5c2879ea..65a4e39c 100644 --- a/tests/cleo_tests/Text/0AD3.txt +++ b/tests/cleo_tests/Text/0AD3.txt @@ -2,15 +2,15 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0AD3' -test("0AD3 (string_format)", @tests) +test("0AD3 (string_format)", tests) terminate_this_custom_script function tests - it("should format string", @test1) - it("should respect short string variable size", @test2) - it("should respect long string variable size", @test3) - it("should create long text", @test4) + it("should format string", test1) + it("should respect short string variable size", test2) + it("should respect long string variable size", test3) + it("should create long text", test4) return diff --git a/tests/cleo_tests/Text/0AD4.txt b/tests/cleo_tests/Text/0AD4.txt index 24decbc2..dab7a2c1 100644 --- a/tests/cleo_tests/Text/0AD4.txt +++ b/tests/cleo_tests/Text/0AD4.txt @@ -2,16 +2,16 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0AD4' -test("0AD4 (scan_string)", @tests) +test("0AD4 (scan_string)", tests) terminate_this_custom_script function tests - it("should scan numbers", @test1) - it("should scan characters", @test2) - it("should scan strings", @test3) - it("should report arg count missmatch", @test4) - it("should respect target string size", @test5) + it("should scan numbers", test1) + it("should scan characters", test2) + it("should scan strings", test3) + it("should report arg count missmatch", test4) + it("should respect target string size", test5) return function test1 diff --git a/tests/cleo_tests/Text/0ADB.txt b/tests/cleo_tests/Text/0ADB.txt index 5695cf41..c9119be3 100644 --- a/tests/cleo_tests/Text/0ADB.txt +++ b/tests/cleo_tests/Text/0ADB.txt @@ -2,11 +2,11 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0ADB' -test("0ADB (get_name_of_vehicle_model)", @tests) +test("0ADB (get_name_of_vehicle_model)", tests) terminate_this_custom_script function tests - it("should return vehicle model name", @test1) + it("should return vehicle model name", test1) return diff --git a/tests/cleo_tests/Text/0ADE.txt b/tests/cleo_tests/Text/0ADE.txt index daf1529a..c9b1892d 100644 --- a/tests/cleo_tests/Text/0ADE.txt +++ b/tests/cleo_tests/Text/0ADE.txt @@ -2,13 +2,13 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0ADE' -test("0ADE (get_text_label_string)", @tests) +test("0ADE (get_text_label_string)", tests) terminate_this_custom_script function tests - it("should return into variable", @test1) - it("should return source pointer", @test2) - it("should return empty", @test3) + it("should return into variable", test1) + it("should return source pointer", test2) + it("should return empty", test3) return function test1 diff --git a/tests/cleo_tests/Text/0ADF.txt b/tests/cleo_tests/Text/0ADF.txt index 078fb612..63e5e3d1 100644 --- a/tests/cleo_tests/Text/0ADF.txt +++ b/tests/cleo_tests/Text/0ADF.txt @@ -2,11 +2,11 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0ADF' -test("0ADF (add_text_label)", @tests) +test("0ADF (add_text_label)", tests) terminate_this_custom_script function tests - it("should add dynamic GXT", @test1) + it("should add dynamic GXT", test1) return function test1 diff --git a/tests/cleo_tests/Text/0AE0.txt b/tests/cleo_tests/Text/0AE0.txt index 42abcead..f390d7b1 100644 --- a/tests/cleo_tests/Text/0AE0.txt +++ b/tests/cleo_tests/Text/0AE0.txt @@ -2,11 +2,11 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0AE0' -test("0AE0 (remove_text_label)", @tests) +test("0AE0 (remove_text_label)", tests) terminate_this_custom_script function tests - it("should remove dynamic GXT", @test1) + it("should remove dynamic GXT", test1) return function test1 diff --git a/tests/cleo_tests/Text/0AED.txt b/tests/cleo_tests/Text/0AED.txt index bb5ae2a9..debec07f 100644 --- a/tests/cleo_tests/Text/0AED.txt +++ b/tests/cleo_tests/Text/0AED.txt @@ -2,12 +2,12 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '0AED' -test("0AED (string_float_format)", @tests) +test("0AED (string_float_format)", tests) terminate_this_custom_script function tests - it("should print float", @test1) + it("should print float", test1) return function test1 diff --git a/tests/cleo_tests/Text/2600.txt b/tests/cleo_tests/Text/2600.txt index a68c6f0c..76c67717 100644 --- a/tests/cleo_tests/Text/2600.txt +++ b/tests/cleo_tests/Text/2600.txt @@ -2,17 +2,17 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '2600' -test("2600 (is_text_empty)", @tests) +test("2600 (is_text_empty)", tests) terminate_this_custom_script function tests - it("short string should be empty", @test1) - it("short string should NOT be empty", @test2) - it("long string should be empty", @test3) - it("long string should NOT be empty", @test4) - it("buffer string should be empty", @test5) - it("buffer string should NOT be empty", @test6) + it("short string should be empty", test1) + it("short string should NOT be empty", test2) + it("long string should be empty", test3) + it("long string should NOT be empty", test4) + it("buffer string should be empty", test5) + it("buffer string should NOT be empty", test6) return function test1 diff --git a/tests/cleo_tests/Text/2601.txt b/tests/cleo_tests/Text/2601.txt index 990e00ea..e1806acf 100644 --- a/tests/cleo_tests/Text/2601.txt +++ b/tests/cleo_tests/Text/2601.txt @@ -2,7 +2,7 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '2601' -test("2601 (is_text_equal)", @tests) +test("2601 (is_text_equal)", tests) terminate_this_custom_script @@ -10,8 +10,8 @@ function tests before_each(@prepare_tests) after_each(@cleanup_tests) - it("should texts be equal", @test1) - it("should texts be NOT equal", @test2) + it("should texts be equal", test1) + it("should texts be NOT equal", test2) return :prepare_tests diff --git a/tests/cleo_tests/Text/2602.txt b/tests/cleo_tests/Text/2602.txt index f7640c50..a8f06ca7 100644 --- a/tests/cleo_tests/Text/2602.txt +++ b/tests/cleo_tests/Text/2602.txt @@ -2,13 +2,13 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '2602' -test("2602 (is_text_in_text)", @tests) +test("2602 (is_text_in_text)", tests) terminate_this_custom_script function tests - it("should contain sub-text", @test1) - it("should NOT contain sub-text", @test2) + it("should contain sub-text", test1) + it("should NOT contain sub-text", test2) return function test1 diff --git a/tests/cleo_tests/Text/2603.txt b/tests/cleo_tests/Text/2603.txt index 0c7ee4b8..6d345c6a 100644 --- a/tests/cleo_tests/Text/2603.txt +++ b/tests/cleo_tests/Text/2603.txt @@ -2,13 +2,13 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '2603' -test("2603 (is_text_prefix)", @tests) +test("2603 (is_text_prefix)", tests) terminate_this_custom_script function tests - it("should contain text prefix", @test1) - it("should NOT contain text prefix", @test2) + it("should contain text prefix", test1) + it("should NOT contain text prefix", test2) return function test1 diff --git a/tests/cleo_tests/Text/2604.txt b/tests/cleo_tests/Text/2604.txt index b82477fc..c8d75478 100644 --- a/tests/cleo_tests/Text/2604.txt +++ b/tests/cleo_tests/Text/2604.txt @@ -2,13 +2,13 @@ {$INCLUDE_ONCE ../cleo_tester.inc} script_name '2604' -test("2604 (is_text_suffix)", @tests) +test("2604 (is_text_suffix)", tests) terminate_this_custom_script function tests - it("should contain text suffix", @test1) - it("should NOT contain text suffix", @test2) + it("should contain text suffix", test1) + it("should NOT contain text suffix", test2) return function test1 diff --git a/tests/cleo_tests/cleo_tester.inc b/tests/cleo_tests/cleo_tester.inc index 382ff5bb..7776815a 100644 --- a/tests/cleo_tests/cleo_tester.inc +++ b/tests/cleo_tests/cleo_tester.inc @@ -11,7 +11,7 @@ function _cleo_tester_read_var(index: int): int int buf = get_label_pointer @_cleo_tester_shared_vars index *= 4 int value = read_memory_with_offset {address} buf {offset} index {size} 4 - return true value + return value end function _cleo_tester_write_var(index: int, value: int) @@ -22,7 +22,7 @@ end /// registers new test suite (collection of unit tests) /// use it(...) for individual unit tests -function test(suite_name: integer, callback: int) +function test(suite_name: string, callback: int) debug_on int suite_name_buf = get_label_pointer @_cleo_tester_test_name @@ -37,10 +37,7 @@ end /// registers new unit test in a test suite /// use assert_*(...) to validate result -function it(spec_name: integer, callback: int) - define function run_spec - define function inject_offset(label: int, offset: int) - +function it(spec_name: string, callback: int) int index = _cleo_tester_read_var(VAR_TEST_INDEX) int spec_name_buf = get_label_pointer @_cleo_tester_spec_name @@ -52,7 +49,7 @@ function it(spec_name: integer, callback: int) _cleo_tester_write_var(VAR_SPEC, callback) _cleo_tester_write_var(VAR_ASSERT_INDEX, 0) - run_spec + run_spec() //trace "~g~~h~~h~Test #%d PASSED" index index++ @@ -101,6 +98,14 @@ hex 00(256) end +/// skips unit test in a test suite +function xit(spec_name: string, callback: int) + int index = _cleo_tester_read_var(VAR_TEST_INDEX) + trace "Skipping test #%d %s" index spec_name + index++ + _cleo_tester_write_var(VAR_TEST_INDEX, index) +end + function _cleo_tester_fail int test_index = _cleo_tester_read_var(VAR_TEST_INDEX) int test_name = get_label_pointer @_cleo_tester_spec_name @@ -116,11 +121,11 @@ end /// checks if bool value is true (different than 0) function assert_true(flag: int) - _cleo_tester_increment_assert + _cleo_tester_increment_assert() if flag == false then - _cleo_tester_fail + _cleo_tester_fail() trace "TRUE expected~n~%d occured" flag breakpoint terminate_this_custom_script @@ -129,11 +134,11 @@ end /// checks if bool value is false function assert_false(flag: int) - _cleo_tester_increment_assert + _cleo_tester_increment_assert() if flag <> false then - _cleo_tester_fail + _cleo_tester_fail() trace "FALSE expected~n~%d occured" flag breakpoint terminate_this_custom_script @@ -143,12 +148,12 @@ end /// checks if condition result value is true :assert_result_true goto_if_false @_assert_result_true - _cleo_tester_increment_assert + _cleo_tester_increment_assert() return :_assert_result_true - _cleo_tester_increment_assert - _cleo_tester_fail + _cleo_tester_increment_assert() + _cleo_tester_fail() trace "Condition result is FALSE, expected TRUE" breakpoint terminate_this_custom_script @@ -158,23 +163,23 @@ return :assert_result_false goto_if_false @_assert_result_false - _cleo_tester_increment_assert - _cleo_tester_fail + _cleo_tester_increment_assert() + _cleo_tester_fail() trace "Condition result is TRUE, expected FALSE" breakpoint terminate_this_custom_script :_assert_result_false - _cleo_tester_increment_assert + _cleo_tester_increment_assert() return /// checks if two int values are equal, otherwise stops the test execution function assert_eq(actual: int, expected: int) - _cleo_tester_increment_assert + _cleo_tester_increment_assert() if actual <> expected then - _cleo_tester_fail + _cleo_tester_fail() trace "%08X expected~n~%08X occured" expected actual breakpoint terminate_this_custom_script @@ -186,7 +191,7 @@ function assert_neq(actual: int, expected: int) if actual == expected then - _cleo_tester_fail + _cleo_tester_fail() trace "Expected value different than %08X" actual breakpoint terminate_this_custom_script @@ -194,12 +199,12 @@ function assert_neq(actual: int, expected: int) end /// checks if two float values are equal, otherwise stops the test execution -function assert_eqf(actual:float, expected:float) - _cleo_tester_increment_assert +function assert_eqf(actual: float, expected: float) + _cleo_tester_increment_assert() if actual <> expected then - _cleo_tester_fail + _cleo_tester_fail() trace "%f expected~n~%f occured" expected actual breakpoint terminate_this_custom_script @@ -207,12 +212,12 @@ function assert_eqf(actual:float, expected:float) end /// checks if two float values are not equal, otherwise stops the test execution -function assert_neqf(actual:float, expected:float) - _cleo_tester_increment_assert +function assert_neqf(actual: float, expected: float) + _cleo_tester_increment_assert() if actual == expected then - _cleo_tester_fail + _cleo_tester_fail() trace "Expected value different than %f" actual breakpoint terminate_this_custom_script @@ -221,11 +226,11 @@ end /// checks if value is a valid pointer, otherwise stops the test execution function assert_ptr(ptr: int) - _cleo_tester_increment_assert + _cleo_tester_increment_assert() if ptr <= 0x10000 // possibly valid pointer then - _cleo_tester_fail + _cleo_tester_fail() trace "%08X is not valid pointer" ptr breakpoint terminate_this_custom_script @@ -234,23 +239,23 @@ end /// checks if value is not 0, otherwise stops the test execution function assert(flag: int) - _cleo_tester_increment_assert + _cleo_tester_increment_assert() if flag == 0 then - _cleo_tester_fail + _cleo_tester_fail() breakpoint terminate_this_custom_script end end /// checks if two string values are equal, otherwise stops the test execution -function assert_eqs(actual:string, expected:string) - _cleo_tester_increment_assert +function assert_eqs(actual: string, expected: string) + _cleo_tester_increment_assert() if not is_text_equal {text} actual {another} expected {ignoreCase} false then - _cleo_tester_fail + _cleo_tester_fail() trace "`%s` expected~n~`%s` occured" expected actual breakpoint terminate_this_custom_script @@ -258,12 +263,12 @@ function assert_eqs(actual:string, expected:string) end /// checks if two string values are not equal, otherwise stops the test execution -function assert_neqs(actual:string, expected:string) - _cleo_tester_increment_assert +function assert_neqs(actual: string, expected: string) + _cleo_tester_increment_assert() if is_text_equal {text} actual {another} expected {ignoreCase} false then - _cleo_tester_fail + _cleo_tester_fail() trace "Expected value different than `%s`" actual breakpoint terminate_this_custom_script From 95dea91427181cd9f04a22b6e99fd2b454bc5d6f Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 12 Apr 2024 00:55:03 +0200 Subject: [PATCH 146/216] Added info about compatibility mode to some errors. (#122) --- cleo_sdk/CLEO_Utils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 239b6f0f..a978fbe5 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -487,7 +487,7 @@ namespace CLEO if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_FLOAT() _readParamFloat(thread).fParam; \ - if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument %s expected to be float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat()) { SHOW_ERROR("Input argument %s expected to be float, got %s in script %s\nScript suspended.\n\nTo ignore this error, change the file extension from .cs to .cs4 and restart the game.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_ANY32() _readParam(thread).dwParam; \ if (!_paramWasInt() && !_paramWasFloat()) { SHOW_ERROR("Input argument %s expected to be int or float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } @@ -528,7 +528,7 @@ namespace CLEO #define OPCODE_READ_PARAM_OUTPUT_VAR_FLOAT() (float*)_readParamVariable(thread); \ if (!_paramWasVariable()) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ - if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.\n\nTo ignore this error, change the file extension from .cs to .cs4 and restart the game.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_OUTPUT_VAR_STRING() _readParamStringInfo(thread); \ if (!_paramWasString(true)) { SHOW_ERROR("Output argument %s expected to be variable string, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } @@ -561,7 +561,7 @@ namespace CLEO if (!_paramWasInt(true) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be int or float variable, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_FLOAT(_value) _writeParam(thread, _value); \ - if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if (!IsLegacyScript(thread) && !_paramWasFloat(true)) { SHOW_ERROR("Output argument %s expected to be variable float, got %s in script %s\nScript suspended.\n\nTo ignore this error, change the file extension from .cs to .cs4 and restart the game.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_WRITE_PARAM_STRING(_value) if(!_writeParamText(thread, _value)) { return OpcodeResult::OR_INTERRUPT; } From 8acb334302bb663925772db83e395906d810f7b3 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 14 Apr 2024 01:48:31 +0200 Subject: [PATCH 147/216] Enabled support of export index as argument in get_proc_address opcode (#124) Enabled support of export index as argument in get_proc_address opcode --- .../MemoryOperations/MemoryOperations.cpp | 19 +++++++++++++++---- tests/cleo_tests/MemoryOperations/0AA4.txt | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 03335792..1e0f5d74 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -396,12 +396,23 @@ class MemoryOperations //0AA4=3, get_proc_address %1d% library %2d% result %3d% // IF and SET static OpcodeResult __stdcall opcode_0AA4(CLEO::CRunningScript* thread) { - OPCODE_READ_PARAM_STRING(name); - auto ptr = (HMODULE)OPCODE_READ_PARAM_PTR(); + void* funcPtr = nullptr; - // allow any pointer, not just from 0AA2 + auto paramType = thread->PeekDataType(); + if (IsImmInteger(paramType) || IsVariable(paramType)) + { + auto procedure = OPCODE_READ_PARAM_UINT(); // text pointer or export index - see GetProcAddress docs + auto module = (HMODULE)OPCODE_READ_PARAM_PTR(); - auto funcPtr = (void*)GetProcAddress(ptr, name); + funcPtr = (void*)GetProcAddress(module, (LPCSTR)procedure); + } + else + { + OPCODE_READ_PARAM_STRING(name); + auto module = (HMODULE)OPCODE_READ_PARAM_PTR(); + + funcPtr = (void*)GetProcAddress(module, name); + } OPCODE_WRITE_PARAM_PTR(funcPtr); OPCODE_CONDITION_RESULT(funcPtr != nullptr); diff --git a/tests/cleo_tests/MemoryOperations/0AA4.txt b/tests/cleo_tests/MemoryOperations/0AA4.txt index e746a223..9d3ca25e 100644 --- a/tests/cleo_tests/MemoryOperations/0AA4.txt +++ b/tests/cleo_tests/MemoryOperations/0AA4.txt @@ -8,6 +8,7 @@ terminate_this_custom_script function tests it("should return address of Sleep function from kernel32.dll", test1) + it("should get export by index", test2) return function test1 @@ -24,4 +25,18 @@ function tests assert(false) end end + + function test2 + int load_library_addr = read_memory 0x858070 4 false + + int kernel_dll_addr = call_function_return {address} load_library_addr {numParams} 1 {pop} 0 {funcParams} "kernel32.dll" // tested opcode + if + // lib address can be any valid pointer, not necessarily one loaded with 0AA2 opcode + int sleep_addr = get_dynamic_library_procedure {procName} 1 {DynamicLibrary} kernel_dll_addr + then + assert(true) + else + assert(false) + end + end end From c36999c0f60eb6f54b7f26d00ea96b60ca638e2a Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 17 Apr 2024 02:46:55 +0200 Subject: [PATCH 148/216] Implementing cleo_arg_count opcode (#125) * Implementing cleo_arg_count opcode * Updated opcode name. Added unit test. --- CHANGELOG.md | 1 + source/CCustomOpcodeSystem.cpp | 31 +++++++++++++++++++++++-------- source/ScmFunction.h | 3 +++ tests/cleo_tests/Cleo/2000.txt | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 tests/cleo_tests/Cleo/2000.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 546caa67..bf3b071b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** - **0DD5 ([get_game_platform](https://library.sannybuilder.com/#/sa/CLEO/0DD5))** + - **2000 ([get_cleo_arg_count](https://library.sannybuilder.com/#/sa/CLEO/2000))** - **2002 ([cleo_return_with](https://library.sannybuilder.com/#/sa/CLEO/2002))** - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index db863379..b7d797ec 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -56,7 +56,8 @@ namespace CLEO OpcodeResult __stdcall opcode_0AE3(CRunningScript* thread); // get_random_object_in_sphere_no_save_recursive OpcodeResult __stdcall opcode_0DD5(CRunningScript* thread); // get_platform - // 2000 free slot + + OpcodeResult __stdcall opcode_2000(CRunningScript* thread); // get_cleo_arg_count // 2001 free slot OpcodeResult __stdcall opcode_2002(CRunningScript* thread); // cleo_return_with OpcodeResult __stdcall opcode_2003(CRunningScript* thread); // cleo_return_fail @@ -247,7 +248,8 @@ namespace CLEO CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform - // 2000, 2001 free + CLEO_RegisterOpcode(0x2000, opcode_2000); // get_cleo_arg_count + // 2001 free CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail } @@ -1042,6 +1044,7 @@ namespace CLEO return thread->Suspend(); } } + scmFunc->callArgCount = (BYTE)nParams; static SCRIPT_VAR arguments[32]; SCRIPT_VAR* locals = thread->IsMission() ? missionLocals : thread->GetVarPtr(); @@ -1049,11 +1052,11 @@ namespace CLEO SCRIPT_VAR* storedLocals = scmFunc->savedTls; // collect arguments - for (DWORD i = 0; i < min(nParams, 32); i++) + for (DWORD i = 0; i < nParams; i++) { SCRIPT_VAR* arg = arguments + i; - auto paramType = (eDataType)*thread->GetBytePointer(); + auto paramType = thread->PeekDataType(); if (IsImmInteger(paramType) || IsVariable(paramType)) { *thread >> arg->dwParam; @@ -1079,10 +1082,6 @@ namespace CLEO } } - // skip unused args - if (nParams > 32) - GetScriptParams(thread, nParams - 32); - // all arguments read scmFunc->retnAddress = thread->GetBytePointer(); @@ -1520,6 +1519,22 @@ namespace CLEO return OR_CONTINUE; } + //2000=1, %1d% = get_cleo_arg_count + OpcodeResult __stdcall opcode_2000(CRunningScript* thread) + { + auto cs = reinterpret_cast(thread); + + ScmFunction* scmFunc = ScmFunction::Get(cs->GetScmFunction()); + if (scmFunc == nullptr) + { + SHOW_ERROR("Quering argument count without preceding CLEO function call in script %s\nScript suspended.", cs->GetInfoStr().c_str()); + return thread->Suspend(); + } + + OPCODE_WRITE_PARAM_INT(scmFunc->callArgCount); + return OR_CONTINUE; + } + //2002=-1, cleo_return_with ... OpcodeResult __stdcall opcode_2002(CRunningScript* thread) { diff --git a/source/ScmFunction.h b/source/ScmFunction.h index 31570552..3a7988d2 100644 --- a/source/ScmFunction.h +++ b/source/ScmFunction.h @@ -17,6 +17,9 @@ namespace CLEO static void Clear(); unsigned short prevScmFunctionId, thisScmFunctionId; + BYTE callArgCount = 0; // args provided to cleo_call + + // saved nesting context state void* savedBaseIP; BYTE* retnAddress; BYTE* savedStack[8]; // gosub stack diff --git a/tests/cleo_tests/Cleo/2000.txt b/tests/cleo_tests/Cleo/2000.txt new file mode 100644 index 00000000..807de839 --- /dev/null +++ b/tests/cleo_tests/Cleo/2000.txt @@ -0,0 +1,34 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2000' +test("2000 (get_cleo_arg_count)", tests) +terminate_this_custom_script + + +:return_arg_count + 0@ = get_cleo_arg_count +cleo_return 1 0@ + + +function tests + it("should return cleo call arguments count", test1) + return + + function test1 + cleo_call @return_arg_count {numParams} 0 {params} {result} 0@ + assert_eq(0@, 0) + + cleo_call @return_arg_count {numParams} 1 {params} 123 {result} 0@ + assert_eq(0@, 1) + + cleo_call @return_arg_count {numParams} 2 {params} 123 0@ {result} 0@ + assert_eq(0@, 2) + + cleo_call @return_arg_count {numParams} 3 {params} 123 0@ "some_text" {result} 0@ + assert_eq(0@, 3) + + cleo_call @return_arg_count {numParams} 25 {params} 123 0@ "some_text" 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 {result} 0@ + assert_eq(0@, 25) + end +end From 352f25cd3ea3233bc040bf2a0ea025a641a9ee42 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 17 Apr 2024 06:13:28 +0200 Subject: [PATCH 149/216] More detailed error message box for invalid input string string params. (#126) --- cleo_sdk/CLEO_Utils.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index a978fbe5..98c41519 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -378,9 +378,20 @@ namespace CLEO auto str = CLEO_ReadStringPointerOpcodeParam(thread, buffer, bufferSize); // returns pointer to source data whenever possible - if (str == nullptr) // other error? + if (str == nullptr) // reading string failed { - SHOW_ERROR("Invalid input argument %s in script %s\nScript suspended.", GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); + auto isVariableInt = IsVariable(_lastParamType) && (_lastParamArrayType == eArrayDataType::ADT_NONE || _lastParamArrayType == eArrayDataType::ADT_INT); + if ((IsImmInteger(_lastParamType) || isVariableInt) && // pointer argument type? + CLEO_GetOpcodeParamsArray()->dwParam <= MinValidAddress) + { + SHOW_ERROR("Invalid '0x%X' pointer of input string argument %s in script %s", CLEO_GetOpcodeParamsArray()->dwParam, GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); + } + else + { + // other error + SHOW_ERROR("Invalid input argument %s in script %s\nScript suspended.", GetParamInfo().c_str(), ScriptInfoStr(thread).c_str()); + } + thread->Suspend(); _lastParamType = DT_INVALID; // mark error return nullptr; From fce8a8f2d9c49bb378e7b1a012d1e923be9a16eb Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 20 Apr 2024 20:42:20 +0200 Subject: [PATCH 150/216] Ignoring unsupported commands when loading opcodes database. (#128) --- source/OpcodeInfoDatabase.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/OpcodeInfoDatabase.cpp b/source/OpcodeInfoDatabase.cpp index ee42d77a..335fc3fa 100644 --- a/source/OpcodeInfoDatabase.cpp +++ b/source/OpcodeInfoDatabase.cpp @@ -78,6 +78,16 @@ bool OpcodeInfoDatabase::_Load(const std::string filepath) continue; // invalid command } + auto attributes = c["attrs"]; + if (attributes.JSONType() == JSON::Class::Object) + { + auto unsupported = attributes["is_unsupported"]; + if (unsupported.JSONType() == JSON::Class::Boolean && unsupported.ToBool()) + { + continue; // command defined as unsupported + } + } + auto idLong = stoul(commandId.ToString(), nullptr, 16); if (idLong > 0x7FFF) { From 42dcc5f292b5e362442995f263c47e9162a70cb7 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 26 Apr 2024 16:18:54 +0200 Subject: [PATCH 151/216] Fixed mission code offsets info in error messages. (#130) * Fixed mission code offsets info in error messages. * fixup! Fixed mission code offsets info in error messages. --- CLEO5.vcxproj | 4 ++++ CLEO5.vcxproj.filters | 3 +++ source/CCustomOpcodeSystem.cpp | 9 +++++++++ source/CCustomOpcodeSystem.h | 1 + source/CScriptEngine.cpp | 28 +++++++++++++++++++++++----- source/CScriptEngine.h | 1 + 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 5dc3855e..6b9583e9 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -67,6 +67,10 @@ NotUsing NotUsing + + NotUsing + NotUsing + diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index a6b4f839..31b955c1 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -105,6 +105,9 @@ source\utils + + plugin_sdk + diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b7d797ec..9cdeb8c4 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -24,6 +24,7 @@ namespace CLEO OpcodeResult __stdcall opcode_0051(CRunningScript * thread); // GOSUB return + OpcodeResult __stdcall opcode_0417(CRunningScript* thread); // load_and_launch_mission_internal OpcodeResult __stdcall opcode_0A92(CRunningScript* thread); // stream_custom_script OpcodeResult __stdcall opcode_0A93(CRunningScript* thread); // terminate_this_custom_script @@ -219,6 +220,7 @@ namespace CLEO TRACE("Initializing CLEO core opcodes..."); CLEO_RegisterOpcode(0x0051, opcode_0051); + CLEO_RegisterOpcode(0x0417, opcode_0417); CLEO_RegisterOpcode(0x0A92, opcode_0A92); CLEO_RegisterOpcode(0x0A93, opcode_0A93); CLEO_RegisterOpcode(0x0A94, opcode_0A94); @@ -842,6 +844,13 @@ namespace CLEO return originalOpcodeHandlers[tableIdx](thread, 0x0051); // call game's original } + OpcodeResult __stdcall CCustomOpcodeSystem::opcode_0417(CRunningScript* thread) // load_and_launch_mission_internal + { + MissionIndex = CLEO_PeekIntOpcodeParam(thread); + size_t tableIdx = 0x0417 / 100; // 100 opcodes peer handler table + return originalOpcodeHandlers[tableIdx](thread, 0x0417); // call game's original + } + //0A92=-1,create_custom_thread %1d% OpcodeResult __stdcall opcode_0A92(CRunningScript *thread) { diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 1197aaf9..ee77a6cf 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -45,6 +45,7 @@ namespace CLEO static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); static OpcodeResult __stdcall opcode_0051(CRunningScript* thread); // GOSUB's return + static OpcodeResult __stdcall opcode_0417(CRunningScript* thread); // load_and_launch_mission_internal private: typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index f7571ae0..2016096f 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -2,6 +2,7 @@ #include "CleoBase.h" #include "CFileMgr.h" #include "CGame.h" +#include #include @@ -298,6 +299,7 @@ namespace CLEO BYTE *scmBlock; BYTE *MissionLoaded; BYTE *missionBlock; + int MissionIndex; BOOL *onMissionFlag; CTexture *scriptSprites; BYTE *scriptDraws; @@ -783,12 +785,27 @@ namespace CLEO } else { - auto address = (DWORD)BaseIP; - if (address == 0) address = GetInstance().VersionManager.TranslateMemoryAddress(MA_SCM_BLOCK); - //address = (DWORD)CurrentIP - address; // processed position - address = (DWORD)CCustomOpcodeSystem::lastOpcodePtr - address; // opcode position + auto base = (DWORD)BaseIP; + if (base == 0) base = (DWORD)scmBlock; + auto currPos = (DWORD)CCustomOpcodeSystem::lastOpcodePtr; - ss << "offset {" << address << "}"; // Sanny offsets style + if (IsMission() && !IsCustom()) + { + if (currPos >= (DWORD)missionBlock) + { + // we are in mission code buffer + // native missions are loaded from script file into mission block area + currPos += ((DWORD*)CTheScripts::MultiScriptArray)[MissionIndex]; // start offset of this mission within source script file + } + else + { + base = (DWORD)scmBlock; // seems that mission uses main scm code + } + } + + auto offset = currPos - base; + + ss << "offset {" << offset << "}"; // Sanny offsets style ss << " - "; ss << std::hex << std::uppercase << std::setw(4) << std::setfill('0') << CCustomOpcodeSystem::lastOpcode; @@ -1524,6 +1541,7 @@ namespace CLEO if (*MissionLoaded) throw std::logic_error("Starting of custom mission when other mission loaded"); *MissionLoaded = 1; + MissionIndex = -1; BaseIP = CurrentIP = missionBlock; } else { diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 07efc805..72283554 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -168,6 +168,7 @@ namespace CLEO } extern BYTE *scmBlock, *missionBlock; + extern int MissionIndex; extern float VectorSqrMagnitude(CVector vector); } From d710d0ada86ebd00c36b97ef7e2c567a228348f4 Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 26 Apr 2024 10:49:03 -0400 Subject: [PATCH 152/216] legacy behavior for passing string variables into functions (#131) --- .../MemoryOperations/MemoryOperations.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 1e0f5d74..8d440bd6 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -114,16 +114,31 @@ class MemoryOperations auto paramType = thread->PeekDataType(); if (IsImmString(paramType) || IsVarString(paramType)) { - if (currTextParam >= Max_Text_Params) + + if (IsLegacyScript(thread) && IsVarString(paramType)) + { + /* + Preserving behavior of CLEO 4 where string variables were always passed as pointers. + It allowed for neat tricks like: + 0@ = 0 + call_function 0x12345678 num_params 3 pop 0 0@v // pass pointer to 0@ + // read result from 0@ + */ + param.pParam = CLEO_GetPointerToScriptVariable(thread); + } + else { - SHOW_ERROR("Provided more (%d) than supported (%d) string arguments in script %s\nScript suspended.", currTextParam + 1, Max_Text_Params, CLEO::ScriptInfoStr(thread).c_str()); - return thread->Suspend(); + if (currTextParam >= Max_Text_Params) + { + SHOW_ERROR("Provided more (%d) than supported (%d) string arguments in script %s\nScript suspended.", currTextParam + 1, Max_Text_Params, CLEO::ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + + OPCODE_READ_PARAM_STRING_LEN(str, MAX_STR_LEN); + strcpy(textParams[currTextParam], str); + param.pcParam = textParams[currTextParam]; + currTextParam++; } - - OPCODE_READ_PARAM_STRING_LEN(str, MAX_STR_LEN); - strcpy(textParams[currTextParam], str); - param.pcParam = textParams[currTextParam]; - currTextParam++; } else if (IsImmInteger(paramType) || IsImmFloat(paramType) || IsVariable(paramType)) { From 49c0d36500fb4ba72cebc990fb35291b7495ca08 Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 30 Apr 2024 09:53:46 -0400 Subject: [PATCH 153/216] use LTS node (#132) --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7092a94c..08fae600 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v4 with: - node-version: latest + node-version: lts/* - name: Read Version Tag id: read_version From 9f5fec74e41628fbbec37fbc4b06c62aa73a26f6 Mon Sep 17 00:00:00 2001 From: Seemann Date: Wed, 1 May 2024 01:51:27 -0400 Subject: [PATCH 154/216] Update CHANGELOG.md (#134) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf3b071b..be616186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,7 +72,7 @@ - **2003 ([cleo_return_fail](https://library.sannybuilder.com/#/sa/CLEO/2003))** - 'argument count' parameter of **0AB1 (cleo_call)** is now optional. `cleo_call @LABEL args 0` can be written as `cleo_call @LABEL` - 'argument count' parameter of **0AB2 (cleo_return)** is now optional. `cleo_return 0` can be written as `cleo_return` - - **cleo*return*\*** opcodes now can pass strings as return arguments + - SCM functions can return string literals and string variables - SCM functions **(0AB1)** now keep their own GOSUB's call stack - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strings longer than target variable - fixed result register not being cleared before function call in opcodes **0AA7** and **0AA8** From b4d6598d07ca541d2943ab57f2deaa67ca300ac1 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 1 May 2024 22:27:17 +0200 Subject: [PATCH 155/216] Fixed parsing of module-export identifier string. (#135) --- source/CCustomOpcodeSystem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 9cdeb8c4..5390d688 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1001,8 +1001,8 @@ namespace CLEO SHOW_ERROR("Invalid module reference '%s' in opcode [0AB1] in script %s \nScript suspended.", moduleTxt.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } - std::string_view strExport = moduleTxt.substr(0, pos); - std::string_view strModule = moduleTxt.substr(pos + 1); + auto strExport = std::string_view(moduleTxt.data(), pos); + auto strModule = std::string_view(moduleTxt.data() + pos + 1); // get module's file absolute path auto modulePath = std::string(strModule); From 50df696aa9502666f99b2cb4c01f0f0a6032e4ee Mon Sep 17 00:00:00 2001 From: Seemann Date: Mon, 3 Jun 2024 20:50:53 -0400 Subject: [PATCH 156/216] add code sign workflow --- .github/workflows/main.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 08fae600..25b3633c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -32,6 +32,15 @@ jobs: msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + - uses: x87/code-sign-action@develop + with: + certificate: '${{ secrets.DIG_KEY_CERT }}' + password: '${{ secrets.DIG_KEY_PWD }}' + certificatename: 'Seemann' + description: 'CLEO 5' + timestampUrl: 'http://timestamp.digicert.com' + filename: './.output/Release/cleo.asi' + - name: Prepare Base Files id: prepare_archive shell: cmd From 302843fd94863a62b2d342eaacadb4a8acd703e5 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 7 Jun 2024 14:48:09 +0200 Subject: [PATCH 157/216] Malware detection fixes (#144) Toolset changed to v142 --- CLEO5.vcxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 6b9583e9..0bb1528a 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -114,14 +114,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 From cec7939f949e8370a0423c291990109ffb3da89a Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 7 Jun 2024 09:19:41 -0400 Subject: [PATCH 158/216] disable code signing (#145) --- .github/workflows/main.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 25b3633c..2f51f061 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -32,14 +32,14 @@ jobs: msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - uses: x87/code-sign-action@develop - with: - certificate: '${{ secrets.DIG_KEY_CERT }}' - password: '${{ secrets.DIG_KEY_PWD }}' - certificatename: 'Seemann' - description: 'CLEO 5' - timestampUrl: 'http://timestamp.digicert.com' - filename: './.output/Release/cleo.asi' + # - uses: x87/code-sign-action@develop + # with: + # certificate: '${{ secrets.DIG_KEY_CERT }}' + # password: '${{ secrets.DIG_KEY_PWD }}' + # certificatename: 'Seemann' + # description: 'CLEO 5' + # timestampUrl: 'http://timestamp.digicert.com' + # filename: './.output/Release/cleo.asi' - name: Prepare Base Files id: prepare_archive From 7694d3a488467ed1bbb7706852945369f355ef5f Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 7 Jun 2024 10:51:39 -0400 Subject: [PATCH 159/216] use VS 2019 (#146) --- .github/workflows/main.yml | 2 +- .github/workflows/test.yml | 4 ++-- cleo_plugins/Audio/Audio.vcxproj | 4 ++-- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 4 ++-- .../FileSystemOperations/FileSystemOperations.vcxproj | 4 ++-- cleo_plugins/IniFiles/IniFiles.vcxproj | 4 ++-- cleo_plugins/Math/Math.vcxproj | 4 ++-- cleo_plugins/MemoryOperations/MemoryOperations.vcxproj | 4 ++-- cleo_plugins/Text/Text.vcxproj | 4 ++-- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2f51f061..810ec23f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,7 +7,7 @@ on: jobs: build: - runs-on: windows-latest + runs-on: windows-2019 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1292b1ff..1237ea84 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ on: jobs: build: - runs-on: windows-latest + runs-on: windows-2019 steps: - uses: actions/checkout@v4 @@ -49,4 +49,4 @@ jobs: .output\Release\* !.output\Release\*.pdb !.output\Release\*.lib - !.output\Release\*.exp \ No newline at end of file + !.output\Release\*.exp diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index f1762915..d69a0176 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -23,14 +23,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 536eb3b5..4225492b 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -22,14 +22,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 035bcfa0..d99498af 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -22,14 +22,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index 295783ac..cec8188d 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -22,14 +22,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/Math/Math.vcxproj b/cleo_plugins/Math/Math.vcxproj index 98dc5acf..1c53f709 100644 --- a/cleo_plugins/Math/Math.vcxproj +++ b/cleo_plugins/Math/Math.vcxproj @@ -22,14 +22,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj index bb8086a3..aaa67287 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj @@ -23,14 +23,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index c3ba88eb..b00fcba9 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -22,14 +22,14 @@ DynamicLibrary false MultiByte - v143 + v142 true DynamicLibrary true MultiByte - v143 + v142 From 7795c5c0582901d73409908f6ec311f011dac90d Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 7 Jun 2024 11:11:20 -0400 Subject: [PATCH 160/216] switch to older Win 10 SDK (#147) --- CLEO5.vcxproj | 2 +- cleo_plugins/Audio/Audio.vcxproj | 2 +- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 2 +- cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj | 2 +- cleo_plugins/IniFiles/IniFiles.vcxproj | 2 +- cleo_plugins/Math/Math.vcxproj | 2 +- cleo_plugins/MemoryOperations/MemoryOperations.vcxproj | 2 +- cleo_plugins/Text/Text.vcxproj | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 0bb1528a..cb4e6030 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -106,7 +106,7 @@ true Win32Proj CLEO5 - 10.0 + 10.0.18362.0 CLEO5 diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index d69a0176..855eb459 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -15,7 +15,7 @@ true Win32Proj Audio - 10.0 + 10.0.18362.0 Audio diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 4225492b..b4cc0f6b 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -15,7 +15,7 @@ true Win32Proj DebugUtils - 10.0 + 10.0.18362.0 diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index d99498af..03365ec6 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -15,7 +15,7 @@ true Win32Proj FileSystemOperations - 10.0 + 10.0.18362.0 diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index cec8188d..d2c86010 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -15,7 +15,7 @@ true Win32Proj IniFiles - 10.0 + 10.0.18362.0 diff --git a/cleo_plugins/Math/Math.vcxproj b/cleo_plugins/Math/Math.vcxproj index 1c53f709..23ce4620 100644 --- a/cleo_plugins/Math/Math.vcxproj +++ b/cleo_plugins/Math/Math.vcxproj @@ -15,7 +15,7 @@ true Win32Proj Math - 10.0 + 10.0.18362.0 diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj index aaa67287..b14e5990 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj @@ -15,7 +15,7 @@ true Win32Proj MemoryOperations - 10.0 + 10.0.18362.0 MemoryOperations diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index b00fcba9..7eed1c17 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -15,7 +15,7 @@ true Win32Proj Text - 10.0 + 10.0.18362.0 From 7be8ff379afda9f9eec349d49cd5ca07b4fefc1c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 11 Jun 2024 06:43:06 +0200 Subject: [PATCH 161/216] Malware detection fixes (#149) * Squashed and cleaned * Dynamic linked redistributables * fixup! Dynamic linked redistributables * Removed VirusTotal step * fixup! Removed VirusTotal step * Updated setup-msbuild to v2 * Removed SHQueryUserNotificationState use. * fixup! Removed SHQueryUserNotificationState use. * Runtime static linking. * yml updated * Trigger modified * fixup! Trigger modified * No signing in test --- .github/workflows/main.yml | 40 ++++++++++++++++++++++----------- .github/workflows/test.yml | 22 ++++++++++++++---- CLEO5.vcxproj | 5 +++-- cleo_sdk/CLEO_Utils.h | 6 ++--- source/CCustomOpcodeSystem.cpp | 7 ++++-- source/CLEO.ico | Bin 0 -> 5430 bytes source/CLEO5.rc | Bin 4092 -> 4102 bytes 7 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 source/CLEO.ico diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 810ec23f..6f285487 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ jobs: submodules: "recursive" - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - uses: actions/setup-node@v4 with: @@ -25,23 +25,37 @@ jobs: id: read_version run: node.exe .github/workflows/version.js - - name: Build Projects + - name: Core - Build shell: cmd run: | set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA - msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - # - uses: x87/code-sign-action@develop - # with: - # certificate: '${{ secrets.DIG_KEY_CERT }}' - # password: '${{ secrets.DIG_KEY_PWD }}' - # certificatename: 'Seemann' - # description: 'CLEO 5' - # timestampUrl: 'http://timestamp.digicert.com' - # filename: './.output/Release/cleo.asi' + - name: Core - Sign + uses: x87/code-sign-action@develop + with: + certificate: '${{ secrets.DIG_KEY_CERT }}' + password: '${{ secrets.DIG_KEY_PWD }}' + certificatename: 'Seemann' + description: 'CLEO 5' + timestampUrl: 'http://timestamp.digicert.com' + filename: './.output/Release/cleo.asi' + + - name: Core - VirusTotal Scan + uses: crazy-max/ghaction-virustotal@v4 + with: + vt_api_key: ${{ secrets.VT_KEY }} + files: | + ./.output/Release/CLEO.asi + + - name: Plugins - Build + shell: cmd + run: | + set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk + msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - name: Prepare Base Files + - name: Gather Output Files id: prepare_archive shell: cmd run: | @@ -109,7 +123,7 @@ jobs: type: "zip" filename: SA.CLEO_${{ github.ref_name }}+Silent_ASI_Loader.zip exclusions: "*.pdb *.lib *.exp" - + - name: Pack Base + UAL uses: ThirteenAG/zip-release@master with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1237ea84..8ac2a245 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,6 +6,7 @@ on: - ".github/*" - "*.md" pull_request: + types: [opened, reopened] workflow_dispatch: jobs: @@ -18,16 +19,28 @@ jobs: submodules: "recursive" - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - - name: Build Projects + - name: Core - Build shell: cmd run: | set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA + + - name: Core - VirusTotal Scan + uses: crazy-max/ghaction-virustotal@v4 + with: + vt_api_key: ${{ secrets.VT_KEY }} + files: | + ./.output/Release/CLEO.asi + + - name: Plugins - Build + shell: cmd + run: | + set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - name: Prepare Files + - name: Gather Output Files id: prepare_archive shell: cmd run: | @@ -41,7 +54,8 @@ jobs: copy cleo_plugins\.output\*.cleo .output\Release\cleo\cleo_plugins copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins - - uses: actions/upload-artifact@v4 + - name: Upload Result + uses: actions/upload-artifact@v4 with: compression-level: 0 name: SA.CLEO5 diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index cb4e6030..ce5d0463 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -166,23 +166,24 @@ /Zc:threadSafeInit- %(AdditionalOptions) Create stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration %(AdditionalDependencies) Windows $(SolutionDir)source\cleo.def false + /Brepro %(AdditionalOptions) xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" if defined GTA_SA_DIR ( taskkill /IM gta_sa.exe /F /FI "STATUS eq RUNNING" xcopy /Y "$(OutDir)$(TargetName).asi" "$(GTA_SA_DIR)\" -xcopy /Y "$(OutDir)$(TargetName).pdb" "$(GTA_SA_DIR)\" ) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 98c41519..9e20b316 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -186,9 +186,9 @@ namespace CLEO auto msg = TraceVArg(CLEO::eLogLevel::Error, format, args); va_end(args); - QUERY_USER_NOTIFICATION_STATE pquns; - SHQueryUserNotificationState(&pquns); - bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); + auto mainWnd = (HWND*)0x001C9055C; // PluginSDK: RsGlobal.ps->window + auto style = GetWindowLong(*mainWnd, GWL_STYLE); + bool fullscreen = (style & (WS_BORDER | WS_CAPTION)) != 0; if (fullscreen) { diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 5390d688..105292e3 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -962,8 +962,11 @@ namespace CLEO { DWORD key; *thread >> key; - SHORT state = GetKeyState(key); - SetScriptCondResult(thread, (GetKeyState(key) & 0x8000) != 0); + + SHORT(__stdcall * GTA_GetKeyState)(int nVirtKey) = memory_pointer(0x0081E64C); // use ingame function as GetKeyState might look like keylogger to some AV software + bool isDown = (GTA_GetKeyState(key) & 0x8000) != 0; + + SetScriptCondResult(thread, isDown); return OR_CONTINUE; } diff --git a/source/CLEO.ico b/source/CLEO.ico new file mode 100644 index 0000000000000000000000000000000000000000..5ba170941b326bb7cbfaab73f559ef4fa96d1c67 GIT binary patch literal 5430 zcmds5d2p5075@UI(W2IAr~RXyk+@bUZAXjR3PK2p5kd@tKp-Im5DGS6si^qJ$~Jz4r=K#t2)#BM6TOf)EpPy?ufD^QiAp>Z2-gT<@pi+tN%DSWQ?x}u_@rawFPH(E_|Bf>TV zLaGi78C4k2c4OG8!+@>_9p{UY6c?*y-ibQyyz|beB?}j5y{c}EI4!WZU54yfE_QBz z`Qz-DHjK%Rj(!U$avxx zKkraptVOR{j@Akps6U$^(zbPJ9|Sc@xRi3tedn@(d+LpRH4A>obH7n~QY9G+Gwqe-gMe6e9;g!kHhn+ogC@(d#{DIV@C7%Q|DqK2u zmfPu!`mzf2+nw0`>T4f}^6i}n8Z9KhBK*SQq%e=EE;lp(+Z)p}KCvr}s4uVPb{b(; z*{QSe>CHHP=pF7`S5$BUeO4#@Mk@j)n^-@6G+!K}@f-d};zih;o;l{yn4oFy!t94< z^4wl8IQ%YjoeFd}G^3E})VFKSuO9Cx%sGneZQFVMd<2QI>q~23SD4V%*p6TS@^NYZ z-X?|vg!CsKo72(N*an}`iKglX)^}t7rca+96*q68)+9H=rE|ihFrqN05SueLf3$VO z*0D2rr(soF5VQwiQJL}FqD1YqY16(G#~hPMiAmZ^RhMB?Ibc=U;W7E(w+7MY3c+s+ zqOqn4@v#a27QfOYqNAf(XZG(lZ5t~(Awy+xEo#axqV#M9Ufz=RA&=dC_uUiYRuY}8 zK`JRa(#1i;E`hTD)Q>IL#dBhQ3 zBNW|)UDS>z9$JC_I`N$=BYaB0%A`c>ShpGl`*w@S&P>DdgoPZtjL!(A1eW=ah+_8# zq@bLDjcZn+p6+XB%SDXZ4G3E_xau?_Y*u5~q{67f0H2}_4QC6HxpKMWO9kop;cYrG zkK{&D;uoU5q8K0cdN5+sV^A+gzq$hJ2I9~Kb*_?uZ<5vK3P$3r1pby?_JBNq`;cY?v+qhYK5dTF6=k+ug{$e&($ zD|~GY@b|Q0-}bHS!#H97E&9N|jvsyFb&R^K_#o)TsN04iGtE;gC;d98FI_-mc?pV- z=c6d^2x?CiP+u(u3@Z3KE+f$041ZTMdX*gr%G+>k?@q~wuU&`8UXAX}+J-@s4kKiD zz@R4EI{4-7s46&uxZgg-x)`Gbdjuogw_wg=s4qSPZ)Y=no$nz)G7NurEB0>N%raNM zAs#B0El#93>co)MNV4R=vJ1w>3)pZyrg$Cm2r~#b6Zah>jHjn8g-hOv0O?`)d)nb_ zYr+dl7PAlKgvp5SMYL9yBc#{B*VBPs`gYqpTd^wTd6u&hVhGY%9HTn-Gq}!Yj&;l+ zn3p9cz^&*;P_00pR)wHSjwabTiGQ-QS+Z$eI{M9Yj^tekkX$eMu_a?2%UEgMzDFhB zG5=VU{^4VpT2 zDrJVkUn&ao5!7nP2NOno9#o$`$^5+LF@)U&R21aV{`4SZGGNGIgSV#(ew6|tqaIZ! zPe}UTAl?~-@%Y$S?)y8ToxpcyH{o--C%;RjL<^}xd4T}|%5R1ocJx~q4bfc(DVa-z0K%wMh%Kd&*zgO>19!ZN}ZLNehITB9FR zDbZ`olaoo_1)oZVD;_u0En;57_G1X88`IJd)a%K9j|i_)iJ(Cb7qwZA^RX*#H~FK% z+LRQQlaY-dQF$N1ML<$Q9QQN$ThIF`QPJz$caU8VLKX`K9S+pVN|}$(7}Ho?S-F_Y z^_eXQ==8W;S&2s;cmNOHcOUABx6fpzn3u!S+yYr%9?Nn}GF7I;$H7TwY0#vhcy2-b zyr+4*P#nGfQ&f?Cpbf&dB^3O(me$1XVi*+0(9C~O* z3|tBooZWKL--WKadLCmieO8{IPwPtSrPql$Yt@-EJT@ebUjHe|KXjOKOEU(YZg@3% zvM=U#|FYlI47XB^fZha;T8oEc?w9Q6%$_+5VSfOQZUww*9psHo6YYO~{`5I`G<2U( z&dK@c`7`1<9GqzX<;;yVKO=h0l%J{fXsD`T9m5fxvAXi}7;;nIW-yC<4b*=0p@;Fa z2OmN;=^O10VbJA8fb!$&VsY%~91Z;lFT{Vb(T8!)b3HEotn6iv?Ti_@E zJQ@R9tLvmWaQu|Ml#xj``VlZ#;Uz!a2zx>J#suPb0Y;b>E=U^p&YFwJG5<*h#+))*c;F zm%Ye3Es^!wN%d=KD^{a8{}dE0o#?vMg0hoE$V|&%KE8iR*$HQ%f`x!5eddm*_uiN!n0`Qt?zlf*wWCfmGn8>zyZ*FzoU-B=qR~#=`y(%<KNn4Muic)p|B)1!!qLF~ zC|nbSx4saB1%IXc<+xlga&D!IOo!bT%7s HpJ4$2(DobE delta 90 zcmZou_#?l8iFLCTYYQ{4GebT@E<*uBB10ZSueU`T*k;~Jb5=) qu)H3F0z(Ev2@n=A6f;;c=mTLgLk>eKP_rIG&g6V<{moX~XIKCr<`yRa From 608c25c9d2aa5e9c92273efc831cd9ca43a442e2 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 11 Jun 2024 18:32:18 +0200 Subject: [PATCH 162/216] Malware detection fixes (#150) --- .github/workflows/main.yml | 22 ++++++++++++++++-- .github/workflows/test.yml | 14 ++++++++++- CLEO5.vcxproj | 6 ++--- cleo_plugins/Audio/Audio.vcxproj | 18 ++++++++++---- cleo_plugins/Audio/Audio.vcxproj.filters | 3 +++ cleo_plugins/DebugUtils/DebugUtils.cpp | 6 +++-- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 18 ++++++++++---- .../DebugUtils/DebugUtils.vcxproj.filters | 3 +++ .../FileSystemOperations.vcxproj | 18 ++++++++++---- .../FileSystemOperations.vcxproj.filters | 3 +++ cleo_plugins/IniFiles/IniFiles.vcxproj | 18 ++++++++++---- .../IniFiles/IniFiles.vcxproj.filters | 3 +++ cleo_plugins/Math/Math.vcxproj | 18 ++++++++++---- cleo_plugins/Math/Math.vcxproj.filters | 3 +++ .../MemoryOperations/MemoryOperations.vcxproj | 18 ++++++++++---- .../MemoryOperations.vcxproj.filters | 3 +++ cleo_plugins/Resource.rc | Bin 0 -> 1538 bytes cleo_plugins/Text/Text.vcxproj | 18 ++++++++++---- cleo_plugins/Text/Text.vcxproj.filters | 3 +++ 19 files changed, 159 insertions(+), 36 deletions(-) create mode 100644 cleo_plugins/Resource.rc diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f285487..d328911d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,7 +7,7 @@ on: jobs: build: - runs-on: windows-2019 + runs-on: windows-2022 steps: - uses: actions/checkout@v4 @@ -47,7 +47,7 @@ jobs: vt_api_key: ${{ secrets.VT_KEY }} files: | ./.output/Release/CLEO.asi - + - name: Plugins - Build shell: cmd run: | @@ -55,6 +55,24 @@ jobs: msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + - name: Plugins - Sign + uses: x87/code-sign-action@develop + with: + certificate: '${{ secrets.DIG_KEY_CERT }}' + password: '${{ secrets.DIG_KEY_PWD }}' + certificatename: 'Seemann' + description: 'CLEO 5 Plugin' + timestampUrl: 'http://timestamp.digicert.com' + folder: './cleo_plugins/.output' + recursive: true + + - name: Plugins - VirusTotal Scan + uses: crazy-max/ghaction-virustotal@v4 + with: + vt_api_key: ${{ secrets.VT_KEY }} + files: | + ./cleo_plugins/.output/*.cleo + - name: Gather Output Files id: prepare_archive shell: cmd diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8ac2a245..b40a3317 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,12 +11,17 @@ on: jobs: build: - runs-on: windows-2019 + runs-on: windows-2022 steps: - uses: actions/checkout@v4 with: submodules: "recursive" + +# - name: Select Windows SDK +# uses: GuillaumeFalourd/setup-windows10-sdk-action@v2 +# with: +# sdk-version: 22000 # 18362, 19041, 20348, 22000, 22621 - name: Add msbuild to PATH uses: microsoft/setup-msbuild@v2 @@ -39,6 +44,13 @@ jobs: run: | set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + + - name: Plugins - VirusTotal Scan + uses: crazy-max/ghaction-virustotal@v4 + with: + vt_api_key: ${{ secrets.VT_KEY }} + files: | + ./cleo_plugins/.output/*.cleo - name: Gather Output Files id: prepare_archive diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index ce5d0463..4f15205e 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -106,7 +106,7 @@ true Win32Proj CLEO5 - 10.0.18362.0 + 10.0 CLEO5 @@ -114,14 +114,14 @@ DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index 855eb459..045b59e9 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -15,7 +15,7 @@ true Win32Proj Audio - 10.0.18362.0 + 10.0 Audio @@ -23,14 +23,14 @@ DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -74,11 +74,12 @@ /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;$(ProjectDir)\bass\;%(AdditionalIncludeDirectories) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk\;$(ProjectDir)bass\;%(AdditionalLibraryDirectories) cleo.lib;bass.lib;%(AdditionalDependencies) @@ -89,6 +90,9 @@ xcopy /Y "$(ProjectDir)*.ini" "$(OutDir)" xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + TARGET_NAME=$(TargetFileName) + @@ -113,6 +117,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + TARGET_NAME=$(TargetFileName) + @@ -158,6 +165,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + diff --git a/cleo_plugins/Audio/Audio.vcxproj.filters b/cleo_plugins/Audio/Audio.vcxproj.filters index 9d593543..71b9e853 100644 --- a/cleo_plugins/Audio/Audio.vcxproj.filters +++ b/cleo_plugins/Audio/Audio.vcxproj.filters @@ -73,4 +73,7 @@ + + + \ No newline at end of file diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index 408e420c..fe72ebe6 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -74,6 +74,8 @@ class DebugUtils static void WINAPI OnDrawingFinished() { + auto GTA_GetKeyState = (SHORT (__stdcall*)(int))0x0081E64C; // use ingame function as GetKeyState might look like keylogger to some AV software + // log messages screenLog.Draw(); @@ -109,7 +111,7 @@ class DebugUtils keysReleased = true; for (size_t i = 0; i < KeyCount; i++) { - auto state = GetKeyState(KeyFirst + i); + auto state = GTA_GetKeyState(KeyFirst + i); if (state & 0x8000) // key down { keysReleased = false; @@ -122,7 +124,7 @@ class DebugUtils const size_t count = min(pausedScripts.size(), KeyCount); for (size_t i = 0; i < count; i++) { - auto state = GetKeyState(KeyFirst + i); + auto state = GTA_GetKeyState(KeyFirst + i); if (state & 0x8000) // key down { keysReleased = false; diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index b4cc0f6b..8c4e9332 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -15,21 +15,21 @@ true Win32Proj DebugUtils - 10.0.18362.0 + 10.0 DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -71,11 +71,12 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) @@ -89,6 +90,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -116,6 +120,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -134,6 +141,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters index b06f5016..699e95c5 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters @@ -36,4 +36,7 @@ + + + \ No newline at end of file diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj index 03365ec6..96f199fa 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj @@ -15,21 +15,21 @@ true Win32Proj FileSystemOperations - 10.0.18362.0 + 10.0 DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -71,11 +71,12 @@ /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) @@ -87,6 +88,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -112,6 +116,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -123,6 +130,9 @@ if defined GTA_SA_DIR ( + + + diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters index 33fef5b0..5f134c6f 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.vcxproj.filters @@ -19,4 +19,7 @@ {a2c39c52-f49e-4ffe-bb0a-661ab07131b9} + + + \ No newline at end of file diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj b/cleo_plugins/IniFiles/IniFiles.vcxproj index d2c86010..05e4b2b3 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj @@ -15,21 +15,21 @@ true Win32Proj IniFiles - 10.0.18362.0 + 10.0 DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -71,11 +71,12 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) @@ -87,6 +88,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -112,10 +116,16 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + + + + diff --git a/cleo_plugins/IniFiles/IniFiles.vcxproj.filters b/cleo_plugins/IniFiles/IniFiles.vcxproj.filters index a93178c2..164b15c4 100644 --- a/cleo_plugins/IniFiles/IniFiles.vcxproj.filters +++ b/cleo_plugins/IniFiles/IniFiles.vcxproj.filters @@ -3,4 +3,7 @@ + + + \ No newline at end of file diff --git a/cleo_plugins/Math/Math.vcxproj b/cleo_plugins/Math/Math.vcxproj index 23ce4620..7c8954a1 100644 --- a/cleo_plugins/Math/Math.vcxproj +++ b/cleo_plugins/Math/Math.vcxproj @@ -15,21 +15,21 @@ true Win32Proj Math - 10.0.18362.0 + 10.0 DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -71,11 +71,12 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) @@ -87,6 +88,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -112,6 +116,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -120,6 +127,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + diff --git a/cleo_plugins/Math/Math.vcxproj.filters b/cleo_plugins/Math/Math.vcxproj.filters index 7b248992..b5865d2b 100644 --- a/cleo_plugins/Math/Math.vcxproj.filters +++ b/cleo_plugins/Math/Math.vcxproj.filters @@ -16,4 +16,7 @@ cleo_sdk + + + \ No newline at end of file diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj index b14e5990..8b6a4feb 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj @@ -15,7 +15,7 @@ true Win32Proj MemoryOperations - 10.0.18362.0 + 10.0 MemoryOperations @@ -23,14 +23,14 @@ DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -72,11 +72,12 @@ /Zc:threadSafeInit- %(AdditionalOptions) $(PLUGIN_SDK_DIR)\plugin_sa\;$(PLUGIN_SDK_DIR)\plugin_sa\game_sa\;$(PLUGIN_SDK_DIR)\shared;$(PLUGIN_SDK_DIR)\shared\game;$(SolutionDir)..\cleo_sdk;%(AdditionalIncludeDirectories) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;%(AdditionalDependencies) @@ -88,6 +89,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -113,6 +117,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -142,6 +149,9 @@ if defined GTA_SA_DIR ( + + + diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters index d26f1298..0b8bafb7 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters +++ b/cleo_plugins/MemoryOperations/MemoryOperations.vcxproj.filters @@ -40,4 +40,7 @@ cleo_sdk + + + \ No newline at end of file diff --git a/cleo_plugins/Resource.rc b/cleo_plugins/Resource.rc new file mode 100644 index 0000000000000000000000000000000000000000..6164065e35dcfb96a82128af8fb9d565ef60fff4 GIT binary patch literal 1538 zcmd6n-*3`T6vw}>nD{?5_@c=)b=hJx#spy_QwGamJS`zGDr>~nP^Rvm*L}``Qd;Kr z;EQQ)@9n+ke81oG*0rz8FVYvGn3+|sO1)zFE$j3y{uxQbQIdI?*KmIeF~oZGB1d+D0l@P_K6ZiuFH zRws@*a)x8pLFL4lZQnd?9Ajzml>LKEl)K2e{ynhsT-3hs%MS-G*EB>rgiNZ>?xgkLzgVc>{#DWQ1>d-H}wj1xP@B9=y?P)k77&& zc^pXzZ9vo`I&MIla>6Q(ooBFp>|7;?jiMg{)S*0)^^_h zek)hiTJ`O2P0GFVFUUu9Kx8{6bNx9Pp_*(ulayqVbPN-jgd+ZkN O>Gir+tGEk2#p(_nLDqTz literal 0 HcmV?d00001 diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index 7eed1c17..86bc04e1 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -15,21 +15,21 @@ true Win32Proj Text - 10.0.18362.0 + 10.0 DynamicLibrary false MultiByte - v142 + v143 true DynamicLibrary true MultiByte - v142 + v143 @@ -71,11 +71,12 @@ _NDEBUG;_USING_V110_SDK71_;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS;GTASA;GTAGAME_NAME="San Andreas";GTAGAME_ABBR="SA";GTAGAME_ABBRLOW="sa";GTAGAME_PROTAGONISTNAME="CJ";GTAGAME_CITYNAME="San Andreas";%(PreprocessorDefinitions);TARGET_NAME=R"($(TargetName))" /Zc:threadSafeInit- %(AdditionalOptions) stdcpp17 + None true true - true + false UseLinkTimeCodeGeneration $(PLUGIN_SDK_DIR)\output\lib\;$(SolutionDir)..\cleo_sdk;%(AdditionalLibraryDirectories) cleo.lib;Shlwapi.lib;%(AdditionalDependencies) @@ -87,6 +88,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -112,6 +116,9 @@ if defined GTA_SA_DIR ( xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" ) + + TARGET_NAME=$(TargetFileName) + @@ -135,6 +142,9 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" + + + diff --git a/cleo_plugins/Text/Text.vcxproj.filters b/cleo_plugins/Text/Text.vcxproj.filters index 81b0b82e..5543202f 100644 --- a/cleo_plugins/Text/Text.vcxproj.filters +++ b/cleo_plugins/Text/Text.vcxproj.filters @@ -56,4 +56,7 @@ {72d65844-4b83-4ef4-9736-ff17836a8cfa} + + + \ No newline at end of file From 9dfd475cae186ce24908b7cad926efa98a68a880 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 15 Jun 2024 09:48:04 +0200 Subject: [PATCH 163/216] Updates and refactoring (#152) Removed not existing files from solution. Moved Plugin SDK files to correct filer group. Unified github actions. Fixed doubled triggering plugins build. Renamed CLEO SDK's container from DirectoryList to StringList. Added initialization methods to CCustomOpcodeSystem and CPluginSystem. Improved look of cleo log by calling initialization manually. Fixed invalid Math plugin filename in readme. Simplified plugins loading. Fixed legacy plugins overwriting new plugins. --- .github/workflows/main.yml | 8 +- .github/workflows/test.yml | 33 +++++--- CLEO5.vcxproj | 1 + CLEO5.vcxproj.filters | 21 +++-- README.md | 2 +- cleo_plugins/Text/CTextManager.cpp | 8 +- cleo_sdk/CLEO.h | 7 +- source/CCustomOpcodeSystem.cpp | 121 ++++++++++++++++------------- source/CCustomOpcodeSystem.h | 5 +- source/CPluginSystem.cpp | 83 ++++++++++++++++++++ source/CPluginSystem.h | 64 ++------------- source/CScriptEngine.cpp | 59 ++++++++------ source/CleoBase.cpp | 3 +- source/FileEnumerator.h | 16 ++-- source/cleo.def | 2 +- 15 files changed, 250 insertions(+), 183 deletions(-) create mode 100644 source/CPluginSystem.cpp diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d328911d..8a440e37 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,15 +45,13 @@ jobs: uses: crazy-max/ghaction-virustotal@v4 with: vt_api_key: ${{ secrets.VT_KEY }} - files: | - ./.output/Release/CLEO.asi + files: './.output/Release/CLEO.asi' - name: Plugins - Build shell: cmd run: | set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - name: Plugins - Sign uses: x87/code-sign-action@develop @@ -64,14 +62,12 @@ jobs: description: 'CLEO 5 Plugin' timestampUrl: 'http://timestamp.digicert.com' folder: './cleo_plugins/.output' - recursive: true - name: Plugins - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 with: vt_api_key: ${{ secrets.VT_KEY }} - files: | - ./cleo_plugins/.output/*.cleo + files: './cleo_plugins/.output/*.cleo' - name: Gather Output Files id: prepare_archive diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b40a3317..39f32e00 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,11 +17,6 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" - -# - name: Select Windows SDK -# uses: GuillaumeFalourd/setup-windows10-sdk-action@v2 -# with: -# sdk-version: 22000 # 18362, 19041, 20348, 22000, 22621 - name: Add msbuild to PATH uses: microsoft/setup-msbuild@v2 @@ -32,25 +27,43 @@ jobs: set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA + - name: Core - Sign + uses: x87/code-sign-action@develop + with: + certificate: '${{ secrets.DIG_KEY_CERT }}' + password: '${{ secrets.DIG_KEY_PWD }}' + certificatename: 'Seemann' + description: 'CLEO 5' + timestampUrl: 'http://timestamp.digicert.com' + filename: './.output/Release/cleo.asi' + - name: Core - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 with: vt_api_key: ${{ secrets.VT_KEY }} - files: | - ./.output/Release/CLEO.asi - + files: './.output/Release/CLEO.asi' + - name: Plugins - Build shell: cmd run: | set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 + + - name: Plugins - Sign + uses: x87/code-sign-action@develop + with: + certificate: '${{ secrets.DIG_KEY_CERT }}' + password: '${{ secrets.DIG_KEY_PWD }}' + certificatename: 'Seemann' + description: 'CLEO 5 Plugin' + timestampUrl: 'http://timestamp.digicert.com' + folder: './cleo_plugins/.output' - name: Plugins - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 with: vt_api_key: ${{ secrets.VT_KEY }} - files: | - ./cleo_plugins/.output/*.cleo + files: './cleo_plugins/.output/*.cleo' - name: Gather Output Files id: prepare_archive diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index 4f15205e..fd6b68af 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -46,6 +46,7 @@ + diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 31b955c1..827563b3 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -87,12 +87,6 @@ source\core - - source\extensions - - - source\game_sa - source\utils @@ -108,6 +102,15 @@ plugin_sdk + + source\extensions + + + plugin_sdk + + + plugin_sdk + @@ -149,12 +152,6 @@ source\core - - source\extensions - - - source\game_sa - source\utils diff --git a/README.md b/README.md index fc26727d..0632c761 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ CLEO itself does not replace any game file, however the following files and fold - cleo\cleo_plugins\SA.DebugUtils.cleo (script debugging utilities plugin) - cleo\cleo_plugins\SA.FileSystemOperations.cleo (disk drive files related operations plugin) - cleo\cleo_plugins\SA.IniFiles.cleo (.ini config files handling plugin) -- cleo\cleo_plugins\SA.IntOperations.cleo (additional math operations plugin) +- cleo\cleo_plugins\SA.Math.cleo (additional math operations plugin) - cleo\cleo_plugins\SA.MemoryOperations.cleo (memory and .dll libraries utilities plugin) - cleo\cleo_plugins\SA.Text.cleo (text processing plugin) - cleo\cleo_saves\ (CLEO save directory) diff --git a/cleo_plugins/Text/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp index 576932ed..5bb88ba5 100644 --- a/cleo_plugins/Text/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -101,16 +101,16 @@ namespace CLEO { try { - std::ifstream stream(list.paths[i]); + std::ifstream stream(list.strings[i]); auto result = ParseFxtFile(stream); - TRACE("Added %d new FXT entries from file %s", result, list.paths[i]); + TRACE("Added %d new FXT entries from file %s", result, list.strings[i]); } catch (std::exception& ex) { - LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", list.paths[i], ex.what()); + LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", list.strings[i], ex.what()); } } - CLEO::CLEO_ListDirectoryFree(list); + CLEO::CLEO_StringListFree(list); } void CTextManager::Clear() diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 736c7e4c..19ec4f62 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -528,11 +528,12 @@ DWORD WINAPI CLEO_GetScriptTextureById(CRunningScript* thread, int id); // ret R DWORD WINAPI CLEO_GetInternalAudioStream(CRunningScript* thread, DWORD stream); // arg CAudioStream * +struct StringList { DWORD count; char** strings; }; +void WINAPI CLEO_StringListFree(StringList list); // releases resources used by StringList container + // Should be always used when working with files. Provides ModLoader compatibility void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path -struct DirectoryList{ DWORD count; char** paths; }; -DirectoryList WINAPI CLEO_ListDirectory(CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles); // thread can be null, searchPath can contain wildcards. After use CLEO_ListDirectoryFree must be called on returned DirectoryList to free allocated resources -void WINAPI CLEO_ListDirectoryFree(DirectoryList list); // releases resources allocated by CLEO_ListDirectory +StringList WINAPI CLEO_ListDirectory(CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles); // thread can be null, searchPath can contain wildcards. After use CLEO_StringListFree must be called on returned StringList to free its resources void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 105292e3..70837aa4 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -215,47 +215,6 @@ namespace CLEO ScmFunction::Clear(); } - CCustomOpcodeSystem::CCustomOpcodeSystem() - { - TRACE("Initializing CLEO core opcodes..."); - - CLEO_RegisterOpcode(0x0051, opcode_0051); - CLEO_RegisterOpcode(0x0417, opcode_0417); - CLEO_RegisterOpcode(0x0A92, opcode_0A92); - CLEO_RegisterOpcode(0x0A93, opcode_0A93); - CLEO_RegisterOpcode(0x0A94, opcode_0A94); - CLEO_RegisterOpcode(0x0A95, opcode_0A95); - CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); - CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); - CLEO_RegisterOpcode(0x0AA9, opcode_0AA9); - CLEO_RegisterOpcode(0x0AB0, opcode_0AB0); - CLEO_RegisterOpcode(0x0AB1, opcode_0AB1); - CLEO_RegisterOpcode(0x0AB2, opcode_0AB2); - CLEO_RegisterOpcode(0x0AB3, opcode_0AB3); - CLEO_RegisterOpcode(0x0AB4, opcode_0AB4); - CLEO_RegisterOpcode(0x0AB5, opcode_0AB5); - CLEO_RegisterOpcode(0x0AB6, opcode_0AB6); - CLEO_RegisterOpcode(0x0AB7, opcode_0AB7); - CLEO_RegisterOpcode(0x0AB8, opcode_0AB8); - CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); - CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); - CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); - CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); - CLEO_RegisterOpcode(0x0AD2, opcode_0AD2); - CLEO_RegisterOpcode(0x0ADC, opcode_0ADC); - CLEO_RegisterOpcode(0x0ADD, opcode_0ADD); - CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); - CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); - CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); - - CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform - - CLEO_RegisterOpcode(0x2000, opcode_2000); // get_cleo_arg_count - // 2001 free - CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with - CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail - } - void CCustomOpcodeSystem::Inject(CCodeInjector& inj) { TRACE("Injecting CustomOpcodeSystem..."); @@ -298,6 +257,51 @@ namespace CLEO } } + void CCustomOpcodeSystem::Init() + { + if (initialized) return; + + TRACE("Initializing CLEO core opcodes..."); + + CLEO_RegisterOpcode(0x0051, opcode_0051); + CLEO_RegisterOpcode(0x0417, opcode_0417); + CLEO_RegisterOpcode(0x0A92, opcode_0A92); + CLEO_RegisterOpcode(0x0A93, opcode_0A93); + CLEO_RegisterOpcode(0x0A94, opcode_0A94); + CLEO_RegisterOpcode(0x0A95, opcode_0A95); + CLEO_RegisterOpcode(0x0AA0, opcode_0AA0); + CLEO_RegisterOpcode(0x0AA1, opcode_0AA1); + CLEO_RegisterOpcode(0x0AA9, opcode_0AA9); + CLEO_RegisterOpcode(0x0AB0, opcode_0AB0); + CLEO_RegisterOpcode(0x0AB1, opcode_0AB1); + CLEO_RegisterOpcode(0x0AB2, opcode_0AB2); + CLEO_RegisterOpcode(0x0AB3, opcode_0AB3); + CLEO_RegisterOpcode(0x0AB4, opcode_0AB4); + CLEO_RegisterOpcode(0x0AB5, opcode_0AB5); + CLEO_RegisterOpcode(0x0AB6, opcode_0AB6); + CLEO_RegisterOpcode(0x0AB7, opcode_0AB7); + CLEO_RegisterOpcode(0x0AB8, opcode_0AB8); + CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); + CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); + CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); + CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); + CLEO_RegisterOpcode(0x0AD2, opcode_0AD2); + CLEO_RegisterOpcode(0x0ADC, opcode_0ADC); + CLEO_RegisterOpcode(0x0ADD, opcode_0ADD); + CLEO_RegisterOpcode(0x0AE1, opcode_0AE1); + CLEO_RegisterOpcode(0x0AE2, opcode_0AE2); + CLEO_RegisterOpcode(0x0AE3, opcode_0AE3); + + CLEO_RegisterOpcode(0x0DD5, opcode_0DD5); // get_platform + + CLEO_RegisterOpcode(0x2000, opcode_2000); // get_cleo_arg_count + // 2001 free + CLEO_RegisterOpcode(0x2002, opcode_2002); // cleo_return_with + CLEO_RegisterOpcode(0x2003, opcode_2003); // cleo_return_fail + + initialized = true; + } + CCustomOpcodeSystem::_OpcodeHandler CCustomOpcodeSystem::originalOpcodeHandlers[OriginalOpcodeHandlersCount]; CCustomOpcodeSystem::_OpcodeHandler CCustomOpcodeSystem::customOpcodeHandlers[CustomOpcodeHandlersCount]; CustomOpcodeHandler CCustomOpcodeSystem::customOpcodeProc[LastCustomOpcode + 1]; @@ -1941,11 +1945,24 @@ extern "C" std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator } - DirectoryList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) + void WINAPI CLEO_StringListFree(StringList list) { - DirectoryList result; + if (list.count > 0 && list.strings != nullptr) + { + for (DWORD i = 0; i < list.count; i++) + { + free(list.strings[i]); + } + + free(list.strings); + } + } + + StringList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) + { + StringList result; result.count = 0; - result.paths = nullptr; + result.strings = nullptr; if (searchPath == nullptr) { @@ -1957,9 +1974,6 @@ extern "C" return result; // nothing to list, done } - // TODO: if available call ModLoader here instead - // scriptFileDir, scriptWorkDir, searchPath - auto fsSearchPath = FS::path(searchPath); if (!fsSearchPath.is_absolute()) { @@ -1974,7 +1988,6 @@ extern "C" HANDLE hSearch = FindFirstFile(searchPath, &wfd); if (hSearch == INVALID_HANDLE_VALUE) { - TRACE("No files found in: %s", searchPath); return result; } @@ -2000,30 +2013,30 @@ extern "C" while (FindNextFile(hSearch, &wfd)); // create results list - result.paths = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers + result.strings = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers for(auto& path : found) { char* str = (char*)malloc(path.length() + 1); strcpy(str, path.c_str()); - result.paths[result.count] = str; + result.strings[result.count] = str; result.count++; } return result; } - void WINAPI CLEO_ListDirectoryFree(DirectoryList list) + void WINAPI CLEO_ListDirectoryFree(StringList list) { - if (list.count > 0 && list.paths != nullptr) + if (list.count > 0 && list.strings != nullptr) { for (DWORD i = 0; i < list.count; i++) { - free(list.paths[i]); + free(list.strings[i]); } - free(list.paths); + free(list.strings); } } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index ee77a6cf..57943dc5 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -32,8 +32,9 @@ namespace CLEO void FinalizeScriptObjects(); - CCustomOpcodeSystem(); + CCustomOpcodeSystem() = default; virtual void Inject(CCodeInjector& inj); + void Init(); ~CCustomOpcodeSystem() { TRACE("Last opcode executed: %04X", lastOpcode); @@ -48,6 +49,8 @@ namespace CLEO static OpcodeResult __stdcall opcode_0417(CRunningScript* thread); // load_and_launch_mission_internal private: + bool initialized = false; + typedef OpcodeResult(__thiscall* _OpcodeHandler)(CRunningScript* thread, WORD opcode); static const size_t OriginalOpcodeHandlersCount = (LastOriginalOpcode / 100) + 1; // 100 opcodes peer handler diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp new file mode 100644 index 00000000..687e9467 --- /dev/null +++ b/source/CPluginSystem.cpp @@ -0,0 +1,83 @@ +#include "stdafx.h" +#include "CPluginSystem.h" +#include "CleoBase.h" + + +using namespace CLEO; + +CPluginSystem::~CPluginSystem() +{ + std::for_each(plugins.begin(), plugins.end(), FreeLibrary); +} + +void CPluginSystem::LoadPlugins() +{ + if (pluginsLoaded) return; // already done + + std::set names; + std::vector filenames; + + // load plugins from main CLEO directory + auto ScanPluginsDir = [&](std::string path, const std::string prefix, const std::string extension) + { + auto pattern = path + '\\' + prefix + '*' + extension; + auto files = CLEO_ListDirectory(nullptr, pattern.c_str(), false, true); + + for (DWORD i = 0; i < files.count; i++) + { + auto name = FS::path(files.strings[i]).filename().string(); + name = name.substr(prefix.length()); // cut off prefix + name.resize(name.length() - extension.length()); // cut off extension + std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::toupper(c); }); + + if (names.find(name) == names.end()) + { + names.insert(name); + filenames.emplace_back(files.strings[i]); + TRACE(" - '%s'", files.strings[i]); + } + else + { + LOG_WARNING(0, " - '%s' skipped, duplicate of `%s` plugin.", files.strings[i], name.c_str()); + } + } + + CLEO_StringListFree(files); + }; + + TRACE("Listing CLEO plugins:"); + ScanPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "SA.", ".cleo"); + ScanPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "", ".cleo"); // legacy plugins in new location + ScanPluginsDir(Filepath_Cleo, "", ".cleo"); // legacy plugins in old location + + // reverse order, so opcodes from CLEO5 plugins can overwrite opcodes from legacy plugins + if (!filenames.empty()) + { + for (auto it = filenames.crbegin(); it < filenames.crend(); it++) + { + const auto filename = it->c_str(); + TRACE("Loading plugin '%s'", filename); + + HMODULE hlib = LoadLibrary(filename); + if (!hlib) + { + LOG_WARNING(0, "Error loading plugin '%s'", filename); + continue; + } + + plugins.push_back(hlib); + } + } + else + { + TRACE(" - nothing found"); + } + + pluginsLoaded = true; +} + +size_t CPluginSystem::GetNumPlugins() const +{ + return plugins.size(); +} + diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index e9fe7fa8..7c427d5a 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -12,67 +12,13 @@ namespace CLEO class CPluginSystem { std::list plugins; + bool pluginsLoaded = false; public: - CPluginSystem() - { - std::set loaded; - auto LoadPluginsDir = [&](std::string path, std::string prefix, std::string extension) - { - std::set> filesWithPrefix; - std::set> filesWithoutPrefix; + CPluginSystem() = default; + ~CPluginSystem(); - FilesWalk(path.c_str(), extension.c_str(), [&](const char* fullPath, const char* filename) - { - std::string name = filename; - name.resize(name.length() - extension.length()); // cut off file type - std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::tolower(c); }); - - if (_strnicmp(name.c_str(), prefix.c_str(), prefix.length()) == 0) - { - filesWithPrefix.insert({ fullPath, name.c_str() + prefix.length() }); - } - else - { - filesWithoutPrefix.insert({ fullPath, name }); - } - }); - - auto loadLib = [&](const char* fullPath, const char* name) - { - if (loaded.find(name) != loaded.end()) - { - LOG_WARNING(0, "Plugin `%s` already loaded. Skipping '%s'", name, fullPath); - return; - } - - TRACE("Loading plugin '%s'", fullPath); - HMODULE hlib = LoadLibrary(fullPath); - if (!hlib) - { - LOG_WARNING(0, "Error loading plugin '%s'", fullPath); - return; - } - - loaded.insert(name); - plugins.push_back(hlib); - }; - - // load with prefix first - for (const auto& entry : filesWithPrefix) loadLib(entry.first.c_str(), entry.second.c_str()); - for (const auto& entry : filesWithoutPrefix) loadLib(entry.first.c_str(), entry.second.c_str()); - }; - - TRACE("Loading plugins..."); - LoadPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "SA.", ".cleo"); // prioritize with prefix - LoadPluginsDir(Filepath_Cleo.c_str(), "SA.", ".cleo"); // legacy plugins location - } - - ~CPluginSystem() - { - std::for_each(plugins.begin(), plugins.end(), FreeLibrary); - } - - inline size_t GetNumPlugins() { return plugins.size(); } + void LoadPlugins(); + size_t GetNumPlugins() const; }; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 2016096f..91e6be96 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -996,36 +996,51 @@ namespace CLEO void CScriptEngine::LoadCustomScripts() { - TRACE("Searching for CLEO scripts"); - FilesWalk(Filepath_Cleo.c_str(), cs_ext, [&](const char* fullPath, const char* filename) - { - if (auto cs = LoadScript(fullPath)) - { - cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state - } - }); + TRACE("Listing CLEO scripts:"); + + std::set found; - FilesWalk(Filepath_Cleo.c_str(), cs4_ext, [&](const char* fullPath, const char* filename) + auto processFileList = [&](StringList fileList) { - if (auto cs = LoadScript(fullPath)) + for (DWORD i = 0; i < fileList.count; i++) { - cs->SetCompatibility(CLEO_VER_4); - cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + const auto ext = FS::path(fileList.strings[i]).extension(); + if (ext == cs_ext || ext == cs3_ext || ext == cs4_ext) + { + TRACE(" - '%s'", fileList.strings[i]); + found.emplace(fileList.strings[i]); + } } - }); + }; + + auto searchPattern = Filepath_Cleo + "\\*.*"; + auto list = CLEO_ListDirectory(nullptr, searchPattern.c_str(), false, true); + processFileList(list); + CLEO_StringListFree(list); - FilesWalk(Filepath_Cleo.c_str(), cs3_ext, [&](const char* fullPath, const char* filename) + if (!found.empty()) { - if (auto cs = LoadScript(fullPath)) + TRACE("Starting CLEO scripts"); + + for (const auto& path : found) { - cs->SetCompatibility(CLEO_VER_3); - cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + if (auto cs = LoadScript(path.c_str())) + { + cs->SetDebugMode(NativeScriptsDebugMode); // inherit from global state + + // compatibility modes + const auto ext = FS::path(path).extension(); + if (ext == cs4_ext) + cs->SetCompatibility(CLEO_VER_4); + else if (ext == cs3_ext) + cs->SetCompatibility(CLEO_VER_3); + } } - }); - - GetInstance().CallCallbacks(eCallbackId::ScriptsLoaded); - - TRACE("Scripts search done"); + } + else + { + TRACE(" - nothing found"); + } } CCustomScript * CScriptEngine::LoadScript(const char * szFilePath) diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index ac794e7f..28ac9b8f 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -145,7 +145,8 @@ namespace CLEO CodeInjector.ReplaceFunction(OnDrawingFinished, 0x00734640); // nullsub_63 - originally something like renderDebugStuff? - TRACE("CLEO instance started successfully!"); + OpcodeSystem.Init(); + PluginSystem.LoadPlugins(); } void CCleoInstance::Stop() diff --git a/source/FileEnumerator.h b/source/FileEnumerator.h index 59593ce3..96bde18b 100644 --- a/source/FileEnumerator.h +++ b/source/FileEnumerator.h @@ -1,20 +1,18 @@ #pragma once #include "..\cleo_sdk\CLEO.h" -#include template void FilesWalk(const char* directory, const char* extension, T callback) { - std::string searchPath = directory; - if (searchPath.back() != '\\' && searchPath.back() != '/') searchPath.push_back('\\'); - searchPath += "*"; - searchPath += extension; + auto filePattern = std::string(directory); + filePattern += "\\*"; + filePattern += extension; - auto list = CLEO::CLEO_ListDirectory(nullptr, searchPath.c_str(), false, true); + auto list = CLEO::CLEO_ListDirectory(nullptr, filePattern.c_str(), false, true); for (DWORD i = 0; i < list.count; i++) { - auto fsPath = FS::path(list.paths[i]); - callback(list.paths[i], fsPath.filename().string().c_str()); + auto fsPath = FS::path(list.strings[i]); + callback(list.strings[i], fsPath.filename().string().c_str()); } - CLEO::CLEO_ListDirectoryFree(list); + CLEO::CLEO_StringListFree(list); } diff --git a/source/cleo.def b/source/cleo.def index 1c1f8bbc..93517774 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -36,7 +36,7 @@ EXPORTS _CLEO_GetScriptParamInfoStr@12 @33 _CLEO_ResolvePath@12 @34 _CLEO_ListDirectory@16 @35 - _CLEO_ListDirectoryFree@8 @36 + _CLEO_StringListFree@8 @36 _CLEO_GetScriptDebugMode@4 @37 _CLEO_SetScriptDebugMode@8 @38 _CLEO_Log@8 @39 From 1caca4be2afce98433366f9272900108808783e7 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 15 Jun 2024 17:23:12 +0200 Subject: [PATCH 164/216] Fixed twice listing plugin files. (#153) --- source/CPluginSystem.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index 687e9467..fb69e735 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -25,12 +25,19 @@ void CPluginSystem::LoadPlugins() for (DWORD i = 0; i < files.count; i++) { + if (std::find(filenames.begin(), filenames.end(), files.strings[i]) != filenames.end()) + continue; // file already listed + auto name = FS::path(files.strings[i]).filename().string(); name = name.substr(prefix.length()); // cut off prefix name.resize(name.length() - extension.length()); // cut off extension - std::transform(name.begin(), name.end(), name.begin(), [](unsigned char c) { return std::toupper(c); }); - if (names.find(name) == names.end()) + // case insensitive search in already listed plugin names + auto found = std::find_if(names.begin(), names.end(), [&](const std::string& s) { + return _stricmp(s.c_str(), name.c_str()) == 0; + }); + + if (found == names.end()) { names.insert(name); filenames.emplace_back(files.strings[i]); @@ -38,7 +45,7 @@ void CPluginSystem::LoadPlugins() } else { - LOG_WARNING(0, " - '%s' skipped, duplicate of `%s` plugin.", files.strings[i], name.c_str()); + LOG_WARNING(0, " - '%s' skipped, duplicate of `%s` plugin", files.strings[i], name.c_str()); } } @@ -53,7 +60,7 @@ void CPluginSystem::LoadPlugins() // reverse order, so opcodes from CLEO5 plugins can overwrite opcodes from legacy plugins if (!filenames.empty()) { - for (auto it = filenames.crbegin(); it < filenames.crend(); it++) + for (auto it = filenames.crbegin(); it != filenames.crend(); it++) { const auto filename = it->c_str(); TRACE("Loading plugin '%s'", filename); From 0da5208621c39eaa657881f8c6cf38e00271cc5a Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 15 Jun 2024 22:32:45 +0200 Subject: [PATCH 165/216] Unrelated exports moved out from CCustomOpcodeSystem class. (#154) Removed leftover CLEO_ListDirectoryFree function. --- source/CCustomOpcodeSystem.cpp | 115 --------------------------------- source/CleoBase.cpp | 101 +++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 115 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 70837aa4..c652e334 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1864,11 +1864,6 @@ extern "C" return texture; } - DWORD WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * - { - return stream; // CAudioStream::streamInternal offset is 0 - } - CLEO::CRunningScript* WINAPI CLEO_CreateCustomScript(CLEO::CRunningScript* fromThread, const char *script_name, int label) { auto filename = reinterpret_cast(fromThread)->ResolvePath(script_name, DIR_CLEO); // legacy: default search location is game\cleo directory @@ -1930,116 +1925,6 @@ extern "C" scriptDeleteDelegate -= func; } - void WINAPI CLEO_ResolvePath(CLEO::CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) - { - if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 2) - { - return; // invalid param - } - - auto resolved = reinterpret_cast(thread)->ResolvePath(inOutPath); - - if (resolved.length() >= pathMaxLen) - resolved.resize(pathMaxLen - 1); // and terminator character - - std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator - } - - void WINAPI CLEO_StringListFree(StringList list) - { - if (list.count > 0 && list.strings != nullptr) - { - for (DWORD i = 0; i < list.count; i++) - { - free(list.strings[i]); - } - - free(list.strings); - } - } - - StringList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) - { - StringList result; - result.count = 0; - result.strings = nullptr; - - if (searchPath == nullptr) - { - return result; // invalid param - } - - if (!listDirs && !listFiles) - { - return result; // nothing to list, done - } - - auto fsSearchPath = FS::path(searchPath); - if (!fsSearchPath.is_absolute()) - { - auto workDir = (thread != nullptr) ? - ((CCustomScript*)thread)->GetWorkDir() : - Filepath_Root.c_str(); - - fsSearchPath = workDir / fsSearchPath; - } - - WIN32_FIND_DATA wfd = { 0 }; - HANDLE hSearch = FindFirstFile(searchPath, &wfd); - if (hSearch == INVALID_HANDLE_VALUE) - { - return result; - } - - std::set found; - do - { - if (!listDirs && (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) - { - continue; // skip directories - } - - if (!listFiles && !(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) - { - continue; // skip files - } - - auto path = FS::path(wfd.cFileName); - if (!path.is_absolute()) // keep absolute in case somebody hooked the APIs to return so - path = fsSearchPath.parent_path() / path; - - found.insert(path.string()); - } - while (FindNextFile(hSearch, &wfd)); - - // create results list - result.strings = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers - - for(auto& path : found) - { - char* str = (char*)malloc(path.length() + 1); - strcpy(str, path.c_str()); - - result.strings[result.count] = str; - result.count++; - } - - return result; - } - - void WINAPI CLEO_ListDirectoryFree(StringList list) - { - if (list.count > 0 && list.strings != nullptr) - { - for (DWORD i = 0; i < list.count; i++) - { - free(list.strings[i]); - } - - free(list.strings); - } - } - BOOL WINAPI CLEO_GetScriptDebugMode(const CLEO::CRunningScript* thread) { return reinterpret_cast(thread)->GetDebugMode(); diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 28ac9b8f..09b5b423 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -220,5 +220,106 @@ namespace CLEO { GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks } + + DWORD WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * + { + return stream; // CAudioStream::streamInternal offset is 0 + } + + void WINAPI CLEO_ResolvePath(CLEO::CRunningScript* thread, char* inOutPath, DWORD pathMaxLen) + { + if (thread == nullptr || inOutPath == nullptr || pathMaxLen < 2) + { + return; // invalid param + } + + auto resolved = reinterpret_cast(thread)->ResolvePath(inOutPath); + + if (resolved.length() >= pathMaxLen) + resolved.resize(pathMaxLen - 1); // and terminator character + + std::memcpy(inOutPath, resolved.c_str(), resolved.length() + 1); // with terminator + } + + void WINAPI CLEO_StringListFree(StringList list) + { + if (list.count > 0 && list.strings != nullptr) + { + for (DWORD i = 0; i < list.count; i++) + { + free(list.strings[i]); + } + + free(list.strings); + } + } + + StringList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) + { + StringList result; + result.count = 0; + result.strings = nullptr; + + if (searchPath == nullptr) + { + return result; // invalid param + } + + if (!listDirs && !listFiles) + { + return result; // nothing to list, done + } + + auto fsSearchPath = FS::path(searchPath); + if (!fsSearchPath.is_absolute()) + { + auto workDir = (thread != nullptr) ? + ((CCustomScript*)thread)->GetWorkDir() : + Filepath_Root.c_str(); + + fsSearchPath = workDir / fsSearchPath; + } + + WIN32_FIND_DATA wfd = { 0 }; + HANDLE hSearch = FindFirstFile(searchPath, &wfd); + if (hSearch == INVALID_HANDLE_VALUE) + { + return result; + } + + std::set found; + do + { + if (!listDirs && (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + continue; // skip directories + } + + if (!listFiles && !(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + { + continue; // skip files + } + + auto path = FS::path(wfd.cFileName); + if (!path.is_absolute()) // keep absolute in case somebody hooked the APIs to return so + path = fsSearchPath.parent_path() / path; + + found.insert(path.string()); + } while (FindNextFile(hSearch, &wfd)); + + // create results list + result.strings = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers + + for (auto& path : found) + { + char* str = (char*)malloc(path.length() + 1); + strcpy(str, path.c_str()); + + result.strings[result.count] = str; + result.count++; + } + + return result; + } } From ddbc910a59d4e9f010f2534144d9dcbb24ed2a8d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 16 Jun 2024 18:56:04 +0200 Subject: [PATCH 166/216] Fixed listing already skipped duplicate files. Added mission FindFirstFile handle closing. StringList creation added as utils function. --- cleo_sdk/CLEO_Utils.h | 24 ++++++++++++++++++++++++ source/CPluginSystem.cpp | 15 ++++++++++----- source/CleoBase.cpp | 38 +++++++------------------------------- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 9e20b316..bf88ce52 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -245,6 +245,30 @@ namespace CLEO return (void*)original; } + template + static StringList CreateStringList(const T& container) + { + StringList result; + result.count = 0; + result.strings = nullptr; + + if (container.size() > 0) + { + result.strings = (char**)malloc(container.size() * sizeof(DWORD)); // array of pointers + for (const std::string& s : container) + { + auto size = s.length() + 1; // and terminator character + auto str = (char*)malloc(size); + memcpy(str, s.c_str(), size); + + result.strings[result.count] = str; + result.count++; + } + } + + return result; + } + #define TRACE(format,...) {CLEO::Trace(CLEO::eLogLevel::Default, format, __VA_ARGS__);} #define LOG_WARNING(script, format, ...) {CLEO::Trace(script, CLEO::eLogLevel::Error, format, __VA_ARGS__);} #define SHOW_ERROR(a,...) {CLEO::ShowError(a, __VA_ARGS__);} diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index fb69e735..97b1af09 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -15,7 +15,8 @@ void CPluginSystem::LoadPlugins() if (pluginsLoaded) return; // already done std::set names; - std::vector filenames; + std::vector paths; + std::set skippedPaths; // load plugins from main CLEO directory auto ScanPluginsDir = [&](std::string path, const std::string prefix, const std::string extension) @@ -25,9 +26,12 @@ void CPluginSystem::LoadPlugins() for (DWORD i = 0; i < files.count; i++) { - if (std::find(filenames.begin(), filenames.end(), files.strings[i]) != filenames.end()) + if (std::find(paths.begin(), paths.end(), files.strings[i]) != paths.end()) continue; // file already listed + if (skippedPaths.find(files.strings[i]) != skippedPaths.end()) + continue; // file already skipped + auto name = FS::path(files.strings[i]).filename().string(); name = name.substr(prefix.length()); // cut off prefix name.resize(name.length() - extension.length()); // cut off extension @@ -40,11 +44,12 @@ void CPluginSystem::LoadPlugins() if (found == names.end()) { names.insert(name); - filenames.emplace_back(files.strings[i]); + paths.emplace_back(files.strings[i]); TRACE(" - '%s'", files.strings[i]); } else { + skippedPaths.emplace(files.strings[i]); LOG_WARNING(0, " - '%s' skipped, duplicate of `%s` plugin", files.strings[i], name.c_str()); } } @@ -58,9 +63,9 @@ void CPluginSystem::LoadPlugins() ScanPluginsDir(Filepath_Cleo, "", ".cleo"); // legacy plugins in old location // reverse order, so opcodes from CLEO5 plugins can overwrite opcodes from legacy plugins - if (!filenames.empty()) + if (!paths.empty()) { - for (auto it = filenames.crbegin(); it != filenames.crend(); it++) + for (auto it = paths.crbegin(); it != paths.crend(); it++) { const auto filename = it->c_str(); TRACE("Loading plugin '%s'", filename); diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 09b5b423..48f6957e 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -256,19 +256,11 @@ namespace CLEO StringList WINAPI CLEO_ListDirectory(CLEO::CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles) { - StringList result; - result.count = 0; - result.strings = nullptr; - if (searchPath == nullptr) - { - return result; // invalid param - } + return {}; // invalid param if (!listDirs && !listFiles) - { - return result; // nothing to list, done - } + return {}; // nothing to list, done auto fsSearchPath = FS::path(searchPath); if (!fsSearchPath.is_absolute()) @@ -281,24 +273,18 @@ namespace CLEO } WIN32_FIND_DATA wfd = { 0 }; - HANDLE hSearch = FindFirstFile(searchPath, &wfd); + HANDLE hSearch = FindFirstFile(fsSearchPath.string().c_str(), &wfd); if (hSearch == INVALID_HANDLE_VALUE) - { - return result; - } + return {}; // nothing found std::set found; do { - if (!listDirs && (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) - { + if (!listDirs && (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) continue; // skip directories - } if (!listFiles && !(wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) - { continue; // skip files - } auto path = FS::path(wfd.cFileName); if (!path.is_absolute()) // keep absolute in case somebody hooked the APIs to return so @@ -307,19 +293,9 @@ namespace CLEO found.insert(path.string()); } while (FindNextFile(hSearch, &wfd)); - // create results list - result.strings = (char**)malloc(found.size() * sizeof(DWORD)); // array of pointers - - for (auto& path : found) - { - char* str = (char*)malloc(path.length() + 1); - strcpy(str, path.c_str()); - - result.strings[result.count] = str; - result.count++; - } + FindClose(hSearch); - return result; + return CreateStringList(found); } } From 7a93a1581c9459a1e095148864be1057f09eebad Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 22 Jun 2024 22:21:26 +0200 Subject: [PATCH 167/216] Enabled CLEO_GetScriptFilename to return info from inactive threads. (#157) Enabled CLEO_GetScriptFilename to work with inactive scripts 0ABA moved to memory plugin, fixed to stop all threads with given name Added terminate_script ptr opcode Added SDK exports --- CHANGELOG.md | 3 + .../MemoryOperations/MemoryOperations.cpp | 40 +++++++++++- cleo_sdk/CLEO.h | 2 + source/CCustomOpcodeSystem.cpp | 26 ++++---- source/CScriptEngine.cpp | 61 +++++++++++++++---- source/CScriptEngine.h | 4 +- source/cleo.def | 2 + 7 files changed, 108 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be616186..02bdc1e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ - new opcode **2405 ([is_script_running](https://library.sannybuilder.com/#/sa/memory/2405))** - new opcode **2406 ([get_script_struct_from_filename](https://library.sannybuilder.com/#/sa/memory/2406))** - new opcode **2407 ([is_memory_equal](https://library.sannybuilder.com/#/sa/memory/2407))** + - new opcode **2408 ([terminate_script](https://library.sannybuilder.com/#/sa/memory/2408))** - new [Text](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/Text) plugin - text related opcodes moved from CLEO core into separated plugin - new opcode **2600 ([is_text_empty](https://library.sannybuilder.com/#/sa/text/2600))** @@ -115,6 +116,8 @@ - CLEO_ReadStringParamWriteBuffer - CLEO_GetOpcodeParamsArray - CLEO_GetParamsHandledCount + - CLEO_IsScriptRunning + - CLEO_TerminateScript - CLEO_GetScriptVersion - CLEO_GetScriptInfoStr - CLEO_GetScriptFilename diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 8d440bd6..444c82bb 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -44,6 +44,8 @@ class MemoryOperations CLEO_RegisterOpcode(0x0AAA, opcode_0AAA); // get_script_struct_named + CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); // terminate_all_custom_scripts_with_this_name + CLEO_RegisterOpcode(0x0AC6, opcode_0AC6); // get_label_pointer CLEO_RegisterOpcode(0x0AC7, opcode_0AC7); // get_var_pointer CLEO_RegisterOpcode(0x0AC8, opcode_0AC8); // allocate_memory @@ -62,6 +64,7 @@ class MemoryOperations CLEO_RegisterOpcode(0x2405, opcode_2405); // is_script_running CLEO_RegisterOpcode(0x2406, opcode_2406); // get_script_struct_from_filename CLEO_RegisterOpcode(0x2407, opcode_2407); // is_memory_equal + CLEO_RegisterOpcode(0x2408, opcode_2408); // terminate_script // register event callbacks @@ -498,6 +501,27 @@ class MemoryOperations return OR_CONTINUE; } + //0ABA=1,terminate_all_custom_scripts_with_this_name %1d% + static OpcodeResult __stdcall opcode_0ABA(CLEO::CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(threadName); + + bool terminateCurrent = false; + while (true) + { + auto found = CLEO_GetScriptByName(threadName, false, true, 0); + if (found == nullptr) + break; + + if (found == thread) + terminateCurrent = true; + + CLEO_TerminateScript(found); + } + + return terminateCurrent ? OR_INTERRUPT : OR_CONTINUE; + } + //0AC6=2,get_label_pointer %1d% store_to %2d% static OpcodeResult __stdcall opcode_0AC6(CLEO::CRunningScript* thread) { @@ -798,11 +822,11 @@ class MemoryOperations //2405=1, is_script_running %1d% static OpcodeResult __stdcall opcode_2405(CLEO::CScriptThread* thread) { - auto address = (CLEO::CScriptThread*)OPCODE_READ_PARAM_INT(); + auto address = (CLEO::CScriptThread*)OPCODE_READ_PARAM_INT(); // allow invalid pointers too - auto name = CLEO_GetScriptFilename(address); + auto running = CLEO_IsScriptRunning(address); - OPCODE_CONDITION_RESULT(name != nullptr); + OPCODE_CONDITION_RESULT(running); return OR_CONTINUE; } @@ -841,6 +865,16 @@ class MemoryOperations OPCODE_CONDITION_RESULT(result == 0); return OR_CONTINUE; } + + //2408=1,terminate_script %1d% + static OpcodeResult __stdcall opcode_2408(CLEO::CScriptThread* thread) + { + auto address = (CLEO::CScriptThread*)OPCODE_READ_PARAM_PTR(); + + CLEO_TerminateScript(address); + + return (address == thread) ? OR_INTERRUPT : OR_CONTINUE; + } } Memory; std::set MemoryOperations::m_allocations; diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 19ec4f62..9dd5b6de 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -467,6 +467,7 @@ void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); // script utils +BOOL WINAPI CLEO_IsScriptRunning(const CRunningScript* thread); // check if script is active void WINAPI CLEO_GetScriptInfoStr(CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize); // short text for displaying in error\log messages void WINAPI CLEO_GetScriptParamInfoStr(int idexOffset, char* buf, DWORD bufSize); // short text with current+offset opcode parameter info (index and name if available) eCLEO_Version WINAPI CLEO_GetScriptVersion(const CRunningScript* thread); // compatibility mode @@ -477,6 +478,7 @@ void WINAPI CLEO_SetScriptWorkDir(CRunningScript* thread, const char* path); void WINAPI CLEO_SetThreadCondResult(CRunningScript* thread, BOOL result); void WINAPI CLEO_ThreadJumpAtLabelPtr(CRunningScript* thread, int labelPtr); +void WINAPI CLEO_TerminateScript(CRunningScript* thread); int WINAPI CLEO_GetOperandType(const CRunningScript* thread); // peek parameter data type. Returns int for legacy reason, should be eDataType. DWORD WINAPI CLEO_GetVarArgCount(CRunningScript* thread); // peek remaining var-args count diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c652e334..067292e3 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -42,7 +42,7 @@ namespace CLEO OpcodeResult __stdcall opcode_0AB6(CRunningScript* thread); // get_target_blip_coords OpcodeResult __stdcall opcode_0AB7(CRunningScript* thread); // get_car_number_of_gears OpcodeResult __stdcall opcode_0AB8(CRunningScript* thread); // get_car_current_gear - OpcodeResult __stdcall opcode_0ABA(CRunningScript* thread); // terminate_all_custom_scripts_with_this_name + OpcodeResult __stdcall opcode_0ABD(CRunningScript* thread); // is_car_siren_on OpcodeResult __stdcall opcode_0ABE(CRunningScript* thread); // is_car_engine_on OpcodeResult __stdcall opcode_0ABF(CRunningScript* thread); // cleo_set_car_engine_on @@ -281,7 +281,6 @@ namespace CLEO CLEO_RegisterOpcode(0x0AB6, opcode_0AB6); CLEO_RegisterOpcode(0x0AB7, opcode_0AB7); CLEO_RegisterOpcode(0x0AB8, opcode_0AB8); - CLEO_RegisterOpcode(0x0ABA, opcode_0ABA); CLEO_RegisterOpcode(0x0ABD, opcode_0ABD); CLEO_RegisterOpcode(0x0ABE, opcode_0ABE); CLEO_RegisterOpcode(0x0ABF, opcode_0ABF); @@ -1283,19 +1282,6 @@ namespace CLEO return OR_CONTINUE; } - //0ABA=1,end_custom_thread_named %1d% - OpcodeResult __stdcall opcode_0ABA(CRunningScript *thread) - { - OPCODE_READ_PARAM_STRING(threadName); - - auto deleted_thread = (CCustomScript*)GetInstance().ScriptEngine.FindScriptNamed(threadName, false, true, 0); - if (deleted_thread) - { - GetInstance().ScriptEngine.RemoveCustomScript(deleted_thread); - } - return deleted_thread == thread ? OR_INTERRUPT : OR_CONTINUE; - } - //0ABD=1, vehicle %1d% siren_on OpcodeResult __stdcall opcode_0ABD(CRunningScript *thread) { @@ -1834,6 +1820,11 @@ extern "C" ThreadJump(thread, labelPtr); } + void WINAPI CLEO_TerminateScript(CLEO::CRunningScript* thread) + { + GetInstance().ScriptEngine.RemoveScript(thread); + } + int WINAPI CLEO_GetOperandType(const CLEO::CRunningScript* thread) { return (int)thread->PeekDataType(); @@ -1935,6 +1926,11 @@ extern "C" reinterpret_cast(thread)->SetDebugMode(enabled); } + BOOL WINAPI CLEO_IsScriptRunning(const CLEO::CRunningScript* thread) + { + return GetInstance().ScriptEngine.IsActiveScriptPtr(thread); + } + void WINAPI CLEO_GetScriptInfoStr(CLEO::CRunningScript* thread, bool currLineInfo, char* buf, DWORD bufSize) { if (thread == nullptr || buf == nullptr || bufSize < 2) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 91e6be96..b2869269 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -311,24 +311,19 @@ namespace CLEO CRunningScript **inactiveThreadQueue, **activeThreadQueue; - extern "C" void __stdcall opcode_004E(CCustomScript *pScript) + extern "C" void __stdcall opcode_004E(CCustomScript *pScript) // terminate_this_script { if (pScript->IsCustom()) { - if (!pScript->IsMission()) + if (pScript->IsMission()) + *MissionLoaded = false; + else { TRACE("Incorrect usage of opcode [004E] in script %s.", pScript->GetName().c_str()); } - else *MissionLoaded = false; - GetInstance().ScriptEngine.RemoveCustomScript(pScript); - } - else - { - if (pScript->IsMission()) *MissionLoaded = false; - RemoveScriptFromQueue(pScript, activeThreadQueue); - AddScriptToQueue(pScript, inactiveThreadQueue); - StopScript(pScript); } + + GetInstance().ScriptEngine.RemoveScript(pScript); } extern "C" void __declspec(naked) opcode_004E_hook(void) @@ -1287,6 +1282,23 @@ namespace CLEO return nullptr; } + bool CScriptEngine::IsActiveScriptPtr(const CRunningScript* ptr) const + { + for (auto script = *activeThreadQueue; script != nullptr; script = script->GetNext()) + { + if (script == ptr) + return ptr->IsActive(); + } + + for (const auto script : CustomScripts) + { + if (script == ptr) + return ptr->IsActive(); + } + + return false; + } + bool CScriptEngine::IsValidScriptPtr(const CRunningScript* ptr) const { for (auto script = *activeThreadQueue; script != nullptr; script = script->GetNext()) @@ -1295,12 +1307,24 @@ namespace CLEO return true; } + for (auto script = *inactiveThreadQueue; script != nullptr; script = script->GetNext()) + { + if (script == ptr) + return true; + } + for (const auto script : CustomScripts) { if (script == ptr) return true; } + for (const auto script : ScriptsWaitingForDelete) + { + if (script == ptr) + return true; + } + return false; } @@ -1327,6 +1351,21 @@ namespace CLEO } } + void CScriptEngine::RemoveScript(CRunningScript* thread) + { + if (!thread->IsCustom()) + { + if (thread->IsMission()) *MissionLoaded = false; + RemoveScriptFromQueue(thread, activeThreadQueue); + AddScriptToQueue(thread, inactiveThreadQueue); + StopScript(thread); + } + else + { + RemoveCustomScript((CCustomScript*)thread); + } + } + void CScriptEngine::RemoveCustomScript(CCustomScript *cs) { // run registered callbacks diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 72283554..4b21db89 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -132,8 +132,10 @@ namespace CLEO CRunningScript* FindScriptNamed(const char* threadName, bool standardScripts, bool customScripts, size_t resultIndex = 0); // can be called multiple times to find more scripts named threadName. resultIndex should be incremented until the method returns nullptr CRunningScript* FindScriptByFilename(const char* path, size_t resultIndex = 0); // if path is not absolute it will be resolved with cleo directory as root - bool IsValidScriptPtr(const CRunningScript*) const; // leads to active script? (regular or custom) + bool IsActiveScriptPtr(const CRunningScript*) const; // leads to active script? (regular or custom) + bool IsValidScriptPtr(const CRunningScript*) const; // leads to any script? (regular or custom) void AddCustomScript(CCustomScript*); + void RemoveScript(CRunningScript*); // native or custom void RemoveCustomScript(CCustomScript*); void RemoveAllCustomScripts(); void UnregisterAllScripts(); diff --git a/source/cleo.def b/source/cleo.def index 93517774..10fc778d 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -52,3 +52,5 @@ EXPORTS _CLEO_GetScriptWorkDir@4 @49 _CLEO_SetScriptWorkDir@8 @50 _CLEO_RegisterCommand@8 @51 + _CLEO_IsScriptRunning@4 @52 + _CLEO_TerminateScript@4 @53 From 0c368da42007bf3c37e10d7aa2ca933892c7ce8f Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 23 Jun 2024 02:06:18 +0200 Subject: [PATCH 168/216] Fixed script path info of main.scm (#159) Documentation updates in CLEO_Utils.h --- cleo_sdk/CLEO_Utils.h | 18 ++++++++++++++---- source/CScriptEngine.cpp | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index bf88ce52..82531b93 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -1,5 +1,15 @@ -// some utilities usefull when creating CLEO plugins -// requires adding "CPools.cpp" from GTA Plugin SDK to the project +// Some optional utilities usefull when creating CLEO plugins +// +// Add following lines to "Additional Include Directories" in project config: +// $(PLUGIN_SDK_DIR)\plugin_sa\ +// $(PLUGIN_SDK_DIR)\shared\game\ +// $(PLUGIN_SDK_DIR)\plugin_sa\game_sa\ +// +// Add following lines to "Preprocesor Definitions": +// GTASA +// TARGET_NAME=R"($(TargetName))" +// +// Depending on used functions, may require adding "CPools.cpp" from GTA Plugin SDK to the project files #pragma once #include "CLEO.h" @@ -17,7 +27,7 @@ namespace CLEO SHOW_ERROR(a,...) // message box, log to file Macros to use inside opcode handler functions. Performs types validation, printing warnings and suspending script on critical errors. - Please mind those might expand into multiple lines, so should, for example, not be used as body of 'if' statements without brackets! + Please mind those expand into multiple lines, so CAN NOT be used in places where single code line is expected! (like 'if' condition body without brackets) OPCODE_CONDITION_RESULT(value) // set result OPCODE_SKIP_PARAMS(count) // ignore X params @@ -196,7 +206,7 @@ namespace CLEO ShowWindow(NULL, SW_MINIMIZE); } - MessageBox(NULL, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); + MessageBoxA(NULL, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); if (fullscreen) { diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index b2869269..83cad2f4 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -962,7 +962,7 @@ namespace CLEO if (CGame::bMissionPackGame == 0) // regular main game { - MainScriptFileDir = FS::path(Filepath_Cleo).append("data\\script").string(); + MainScriptFileDir = FS::path(Filepath_Root).append("data\\script").string(); MainScriptFileName = "main.scm"; } else // mission pack From 65eca0c650745614287a021a555d00d600787dfc Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 23 Jun 2024 09:39:57 +0200 Subject: [PATCH 169/216] Enabled support of network sources in audio stream opcodes. (#158) Enabled support of network sources in audio stream opcodes. Added network sources support as option in settings --- cleo_plugins/Audio/Audio.cpp | 11 ++++++++--- cleo_plugins/Audio/C3DAudioStream.cpp | 8 +++++++- cleo_plugins/Audio/CAudioStream.cpp | 8 +++++++- cleo_plugins/Audio/CSoundSystem.cpp | 8 ++++++++ cleo_plugins/Audio/CSoundSystem.h | 3 +++ cleo_plugins/Audio/SA.Audio.ini | 3 +++ 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index 0c8060ec..873b18ec 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -105,7 +105,10 @@ class Audio //0AAC=2, %2d% = load_audiostream %1d% // IF and SET static OpcodeResult __stdcall opcode_0AAC(CScriptThread* thread) { - OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING_LEN(path, 511); + + if (!isNetworkSource(path)) + CLEO_ResolvePath(thread, _buff_path, sizeof(_buff_path)); auto ptr = soundSystem.CreateStream(path); @@ -207,7 +210,10 @@ class Audio //0AC1=2,%2d% = load_audiostream_with_3d_support %1d% //IF and SET static OpcodeResult __stdcall opcode_0AC1(CScriptThread* thread) { - OPCODE_READ_PARAM_FILEPATH(path); + OPCODE_READ_PARAM_STRING_LEN(path, 511); + + if (!isNetworkSource(path)) + CLEO_ResolvePath(thread, _buff_path, sizeof(_buff_path)); auto ptr = soundSystem.CreateStream(path, true); @@ -414,4 +420,3 @@ class Audio } audioInstance; CSoundSystem Audio::soundSystem; - diff --git a/cleo_plugins/Audio/C3DAudioStream.cpp b/cleo_plugins/Audio/C3DAudioStream.cpp index fe7f637a..4b774808 100644 --- a/cleo_plugins/Audio/C3DAudioStream.cpp +++ b/cleo_plugins/Audio/C3DAudioStream.cpp @@ -6,13 +6,19 @@ using namespace CLEO; C3DAudioStream::C3DAudioStream(const char* filepath) : CAudioStream() { + if (isNetworkSource(filepath) && !CSoundSystem::allowNetworkSources) + { + TRACE("Loading of 3d-audiostream '%s' failed. Support of network sources was disabled in SA.Audio.ini", filepath); + return; + } + unsigned flags = BASS_SAMPLE_3D | BASS_SAMPLE_MONO | BASS_SAMPLE_SOFTWARE; if (CSoundSystem::useFloatAudio) flags |= BASS_SAMPLE_FLOAT; if (!(streamInternal = BASS_StreamCreateFile(FALSE, filepath, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(filepath, 0, flags, nullptr, nullptr))) { - LOG_WARNING(0, "Loading 3d-audiostream %s failed. Error code: %d", filepath, BASS_ErrorGetCode()); + LOG_WARNING(0, "Loading of 3d-audiostream '%s' failed. Error code: %d", filepath, BASS_ErrorGetCode()); return; } diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index d41097e4..b539c2c1 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -6,13 +6,19 @@ using namespace CLEO; CAudioStream::CAudioStream(const char* filepath) { + if (isNetworkSource(filepath) && !CSoundSystem::allowNetworkSources) + { + TRACE("Loading of audiostream '%s' failed. Support of network sources was disabled in SA.Audio.ini", filepath); + return; + } + unsigned flags = BASS_SAMPLE_SOFTWARE; if (CSoundSystem::useFloatAudio) flags |= BASS_SAMPLE_FLOAT; if (!(streamInternal = BASS_StreamCreateFile(FALSE, filepath, 0, 0, flags)) && !(streamInternal = BASS_StreamCreateURL(filepath, 0, flags, 0, nullptr))) { - LOG_WARNING(0, "Loading audiostream %s failed. Error code: %d", filepath, BASS_ErrorGetCode()); + LOG_WARNING(0, "Loading of audiostream '%s' failed. Error code: %d", filepath, BASS_ErrorGetCode()); return; } diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index 326045ce..3166f55a 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -8,6 +8,7 @@ namespace CLEO { bool CSoundSystem::useFloatAudio = false; + bool CSoundSystem::allowNetworkSources = true; BASS_3DVECTOR CSoundSystem::pos(0.0, 0.0, 0.0); BASS_3DVECTOR CSoundSystem::vel(0.0, 0.0, 0.0); BASS_3DVECTOR CSoundSystem::front(0.0, -1.0, 0.0); @@ -29,6 +30,12 @@ namespace CLEO } } + bool isNetworkSource(const char* path) + { + return _strnicmp("http:", path, 5) == 0 || + _strnicmp("https:", path, 6) == 0; + } + CSoundSystem::~CSoundSystem() { TRACE("Finalizing SoundSystem..."); @@ -49,6 +56,7 @@ namespace CLEO auto config = GetConfigFilename(); defaultStreamType = (eStreamType)GetPrivateProfileInt("General", "DefaultStreamType", 0, config.c_str()); + allowNetworkSources = GetPrivateProfileInt("General", "AllowNetworkSources", 1, config.c_str()) != 0; int default_device, total_devices, enabled_devices; EnumerateBassDevices(total_devices, enabled_devices, default_device); diff --git a/cleo_plugins/Audio/CSoundSystem.h b/cleo_plugins/Audio/CSoundSystem.h index 9322f2c0..28c93682 100644 --- a/cleo_plugins/Audio/CSoundSystem.h +++ b/cleo_plugins/Audio/CSoundSystem.h @@ -25,6 +25,7 @@ namespace CLEO bool paused = false; static bool useFloatAudio; + static bool CSoundSystem::allowNetworkSources; static BASS_3DVECTOR pos; static BASS_3DVECTOR vel; @@ -52,4 +53,6 @@ namespace CLEO void Resume(); void Process(); }; + + bool isNetworkSource(const char* path); } diff --git a/cleo_plugins/Audio/SA.Audio.ini b/cleo_plugins/Audio/SA.Audio.ini index a9925975..9103f5b5 100644 --- a/cleo_plugins/Audio/SA.Audio.ini +++ b/cleo_plugins/Audio/SA.Audio.ini @@ -2,6 +2,9 @@ ; Manually select audio device. Visit `.cleo.log` file to check list of available options. -1 for automatic AudioDevice=-1 +; Allow playing streams from http(s) locations +AllowNetworkSources=1 + ; Which game's volume settings CLEO sounds should use by default: 0 - None, 1 - SFX, 2 - Music DefaultStreamType=1 From e6dc8fccb3fa3d11a5e7ea05bf0781a1a66616ac Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 24 Jun 2024 18:49:20 +0200 Subject: [PATCH 170/216] Cleo scripts listing with modloader (#160) --- source/CScriptEngine.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 83cad2f4..42a356b9 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1008,11 +1008,21 @@ namespace CLEO } }; - auto searchPattern = Filepath_Cleo + "\\*.*"; + auto searchPattern = Filepath_Cleo + "\\*" + cs_ext; auto list = CLEO_ListDirectory(nullptr, searchPattern.c_str(), false, true); processFileList(list); CLEO_StringListFree(list); + searchPattern = Filepath_Cleo + "\\*" + cs3_ext; + list = CLEO_ListDirectory(nullptr, searchPattern.c_str(), false, true); + processFileList(list); + CLEO_StringListFree(list); + + searchPattern = Filepath_Cleo + "\\*" + cs4_ext; + list = CLEO_ListDirectory(nullptr, searchPattern.c_str(), false, true); + processFileList(list); + CLEO_StringListFree(list); + if (!found.empty()) { TRACE("Starting CLEO scripts"); From 5c5d2a4282e8f2cf6fe1fb714044c22c54eddca2 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 24 Jun 2024 20:51:35 +0200 Subject: [PATCH 171/216] Fixes and refactor (#161) Refactors Virtual path `userfiles:` shortened to `user:` Fixed full screen detection when displaying message boxes. --- CHANGELOG.md | 3 ++- cleo_sdk/CLEO.h | 2 +- cleo_sdk/CLEO_Utils.h | 12 ++++++------ source/CCustomOpcodeSystem.h | 1 + source/CModuleSystem.h | 4 ++++ source/CPluginSystem.h | 1 + source/CScriptEngine.cpp | 12 +++++++++--- source/CScriptEngine.h | 8 +++++++- source/CleoBase.h | 2 +- 9 files changed, 32 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02bdc1e5..1b27249f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,10 +77,11 @@ - SCM functions **(0AB1)** now keep their own GOSUB's call stack - fixed bug in **0AD4 ([scan_string](https://library.sannybuilder.com/#/sa/text/2604))** causing data overruns when reading strings longer than target variable - fixed result register not being cleared before function call in opcodes **0AA7** and **0AA8** + - fixed **0ABA ([terminate_all_custom_scripts_with_this_name](https://library.sannybuilder.com/#/sa/CLEO/0ABA))** terminating only first found script - changes in file operations - file paths can now use 'virtual absolute paths'. Use prefix in file path strings to access predefined locations: - `root:\` for _game root_ directory - - `userfiles:\` for _game save files_ directory + - `user:\` for _game save files_ directory - `.\` for _this script file_ directory - `cleo:\` for _CLEO_ directory - `modules:\` for _CLEO\cleo_modules_ directory diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 9dd5b6de..918422f8 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -279,7 +279,7 @@ static eLogicalOperation& operator--(eLogicalOperation& o) // CLEO virtual path prefixes. Expandable with CLEO_ResolvePath const char DIR_GAME[] = "root:"; // game root directory -const char DIR_USER[] = "userfiles:"; // game save directory +const char DIR_USER[] = "user:"; // game save directory const char DIR_SCRIPT[] = "."; // current script directory const char DIR_CLEO[] = "cleo:"; // game\cleo directory const char DIR_MODULES[] = "modules:"; // game\cleo\modules directory diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 82531b93..3f50a9e4 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -196,22 +196,22 @@ namespace CLEO auto msg = TraceVArg(CLEO::eLogLevel::Error, format, args); va_end(args); - auto mainWnd = (HWND*)0x001C9055C; // PluginSDK: RsGlobal.ps->window + auto mainWnd = (HWND*)0x00C8CF88; // PluginSDK: RsGlobal.ps->window auto style = GetWindowLong(*mainWnd, GWL_STYLE); - bool fullscreen = (style & (WS_BORDER | WS_CAPTION)) != 0; + bool fullscreen = (style & (WS_BORDER | WS_CAPTION)) == 0; if (fullscreen) { - PostMessage(NULL, WM_SYSCOMMAND, SC_MINIMIZE, 0); - ShowWindow(NULL, SW_MINIMIZE); + PostMessage(*mainWnd, WM_SYSCOMMAND, SC_MINIMIZE, 0); + ShowWindow(*mainWnd, SW_MINIMIZE); } MessageBoxA(NULL, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); if (fullscreen) { - PostMessage(NULL, WM_SYSCOMMAND, SC_RESTORE, 0); - ShowWindow(NULL, SW_RESTORE); + PostMessage(*mainWnd, WM_SYSCOMMAND, SC_RESTORE, 0); + ShowWindow(*mainWnd, SW_RESTORE); } } diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 57943dc5..9059bdc2 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -33,6 +33,7 @@ namespace CLEO void FinalizeScriptObjects(); CCustomOpcodeSystem() = default; + CCustomOpcodeSystem(const CCustomOpcodeSystem&) = delete; // no copying virtual void Inject(CCodeInjector& inj); void Init(); ~CCustomOpcodeSystem() diff --git a/source/CModuleSystem.h b/source/CModuleSystem.h index f6cd2437..b78aee14 100644 --- a/source/CModuleSystem.h +++ b/source/CModuleSystem.h @@ -20,6 +20,9 @@ namespace CLEO class CModuleSystem { public: + CModuleSystem() = default; + CModuleSystem(const CModuleSystem&) = delete; // no copying + void Clear(); // registers module reference. Needs to be released with ReleaseModuleRef @@ -66,6 +69,7 @@ namespace CLEO public: CModule(); + CModule(const CModule&) = delete; // no copying ~CModule(); void Clear(); diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index 7c427d5a..a1b67287 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -16,6 +16,7 @@ namespace CLEO public: CPluginSystem() = default; + CPluginSystem(const CPluginSystem&) = delete; // no copying ~CPluginSystem(); void LoadPlugins(); diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 42a356b9..53e8d602 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -658,6 +658,14 @@ namespace CLEO scriptFileName = filename; } + std::string CCustomScript::GetScriptFileFullPath() const + { + std::string path = GetScriptFileDir(); + path += '\\'; + path += GetScriptFileName(); + return path; + } + const char* CCustomScript::GetWorkDir() const { if (!bIsCustom) @@ -1245,9 +1253,7 @@ namespace CLEO if (script == nullptr) return false; auto cs = (CCustomScript*)script; - std::string scriptPath = cs->GetScriptFileDir(); - scriptPath += '\\'; - scriptPath += cs->GetScriptFileName(); + std::string scriptPath = cs->GetScriptFileFullPath(); if (scriptPath.length() < pathLen) return false; diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 4b21db89..636bf925 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -55,7 +55,9 @@ namespace CLEO inline DWORD& GetLastSearchPed() { return LastSearchPed; } inline DWORD& GetLastSearchVehicle() { return LastSearchCar; } inline DWORD& GetLastSearchObject() { return LastSearchObj; } - CCustomScript(const char *szFileName, bool bIsMiss = false, CRunningScript *parent = nullptr, int label = 0); + + CCustomScript(const char *szFileName, bool bIsMiss = false, CRunningScript *parent = nullptr, int label = 0); + CCustomScript(const CCustomScript&) = delete; // no copying ~CCustomScript(); void Process(); @@ -84,6 +86,9 @@ namespace CLEO const char* GetScriptFileName() const; void SetScriptFileName(const char* filename); + // absolute path to the script file + std::string GetScriptFileFullPath() const; + // current working directory of this script. Can be changed ith 0A99 const char* GetWorkDir() const; void SetWorkDir(const char* directory); @@ -117,6 +122,7 @@ namespace CLEO static SCRIPT_VAR CleoVariables[0x400]; CScriptEngine() = default; + CScriptEngine(const CScriptEngine&) = delete; // no copying ~CScriptEngine(); virtual void Inject(CCodeInjector&); diff --git a/source/CleoBase.h b/source/CleoBase.h index ef46daac..06cb7f25 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -30,7 +30,7 @@ namespace CLEO CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; CPluginSystem PluginSystem; - OpcodeInfoDatabase OpcodeInfoDb; + OpcodeInfoDatabase OpcodeInfoDb; int saveSlot = -1; // -1 if not loaded from save From f4ee380c68ab78486c06c2abfe775b4e7e324a58 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 24 Jun 2024 20:52:20 +0200 Subject: [PATCH 172/216] New get_file_write_time opcode (#162) New opcode get_file_write_time --- .../FileSystemOperations.cpp | 40 +++++++ .../cleo_tests/FilesystemOperations/2305.txt | 110 ++++++++++++++++++ tests/cleo_tests/cleo_tester.inc | 13 +++ 3 files changed, 163 insertions(+) create mode 100644 tests/cleo_tests/FilesystemOperations/2305.txt diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 825465e2..93ea5f68 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -76,6 +76,7 @@ class FileSystemOperations CLEO_RegisterOpcode(0x2302, opcode_2302); // write_block_to_file CLEO_RegisterOpcode(0x2303, opcode_2303); // resolve_filepath CLEO_RegisterOpcode(0x2304, opcode_2304); // get_script_filename + CLEO_RegisterOpcode(0x2305, opcode_2305); // get_file_write_time // register event callbacks CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); @@ -760,6 +761,45 @@ class FileSystemOperations OPCODE_CONDITION_RESULT(true); return OR_CONTINUE; } + + //2305=8, get_file_write_time %1s% year %2d% month %3d% day %3d% hour %4d% minute %5d% second %6d% milisecond %7d% // IF and SET + static OpcodeResult __stdcall opcode_2305(CRunningScript* thread) + { + OPCODE_READ_PARAM_FILEPATH(path); + + HANDLE file = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); + if (file == INVALID_HANDLE_VALUE) + { + OPCODE_SKIP_PARAMS(7); + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + + FILETIME writeTime; + if (!GetFileTime(file, nullptr, nullptr, &writeTime)) + { + CloseHandle(file); + OPCODE_SKIP_PARAMS(7); + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } + CloseHandle(file); + + // convert to local time + SYSTEMTIME timeUTC, timeLocal; + FileTimeToSystemTime(&writeTime, &timeUTC); + SystemTimeToTzSpecificLocalTime(NULL, &timeUTC, &timeLocal); + + OPCODE_WRITE_PARAM_INT(timeLocal.wYear); + OPCODE_WRITE_PARAM_INT(timeLocal.wMonth); + OPCODE_WRITE_PARAM_INT(timeLocal.wDay); + OPCODE_WRITE_PARAM_INT(timeLocal.wHour); + OPCODE_WRITE_PARAM_INT(timeLocal.wMinute); + OPCODE_WRITE_PARAM_INT(timeLocal.wSecond); + OPCODE_WRITE_PARAM_INT(timeLocal.wMilliseconds); + OPCODE_CONDITION_RESULT(true); + return OR_CONTINUE; + } } fileSystemOperations; std::set FileSystemOperations::m_hFiles; diff --git a/tests/cleo_tests/FilesystemOperations/2305.txt b/tests/cleo_tests/FilesystemOperations/2305.txt new file mode 100644 index 00000000..f35e722f --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/2305.txt @@ -0,0 +1,110 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "2305" // get_file_write_time +test("2305 (get_file_write_time)", tests) +terminate_this_custom_script + + +function tests + const Test_File_Path = ".\test_file.ini" + + before_each(@prepare) + after_each(@finalize) + + it("should fail", test1) + it("should return valid data", test2) + it("should work with already opened file", test3) + it("should return correct time", test4) + return + + :prepare + 1@ = 0x11111111 + 2@ = 0x22222222 + 3@ = 0x33333333 + 4@ = 0x44444444 + 5@ = 0x55555555 + 6@ = 0x66666666 + 7@ = 0x77777777 + write_int_to_ini_file {value} 1234 {path} Test_File_Path {section} "general" {key} "test" + return + + :finalize + delete_file {fileName} Test_File_Path + return + + function test1 + 1@, 2@, 3@, 4@, 5@, 6@, 7@ = get_file_write_time {path} ".\not_existing.ini" + assert_result_false() + assert_eq(1@, 0x11111111) + assert_eq(2@, 0x22222222) + assert_eq(3@, 0x33333333) + assert_eq(4@, 0x44444444) + assert_eq(5@, 0x55555555) + assert_eq(6@, 0x66666666) + assert_eq(7@, 0x77777777) + end + + function test2 + 1@, 2@, 3@, 4@, 5@, 6@, 7@ = get_file_write_time {path} Test_File_Path + assert_result_true() + assert_range(1@, 2000, 2100) // year + assert_range(2@, 1, 12) // month + assert_range(3@, 1, 31) // day + assert_range(4@, 0, 23) // hour + assert_range(5@, 0, 59) // minute + assert_range(6@, 0, 59) // second + assert_range(7@, 0, 999) // milisedond + trace "~g~~h~~h~Read time: %04d-%02d-%02d %02d:%02d:%02d.%03d" 1@ 2@ 3@ 4@ 5@ 6@ 7@ + end + + function test3 + 1@, 2@, 3@, 4@, 5@, 6@, 7@ = get_file_write_time {path} "root:\cleo.asi" + assert_result_true() + assert_range(1@, 2000, 2100) // year + assert_range(2@, 1, 12) // month + assert_range(3@, 1, 31) // day + assert_range(4@, 0, 23) // hour + assert_range(5@, 0, 59) // minute + assert_range(6@, 0, 59) // second + assert_range(7@, 0, 999) // milisecond + end + + function test4 + var 0@ : Integer + var 1@ : Integer + var 2@ : Integer + var 3@ : Integer + var 4@ : Integer + var 5@ : Integer + var 6@ : Integer + var 7@ : Integer + var 8@ : Integer + + 0@, 0@, 0@, 1@, 2@, 3@, 4@ = get_file_write_time {path} Test_File_Path + assert_result_true() + + // calculate small time stamp + 2@ *= 60000 // minutes to ms + 3@ *= 1000 // seconds to ms + 4@ += 2@ + 4@ += 3@ + + wait 500 + write_int_to_ini_file {value} 1234 {path} Test_File_Path {section} "general" {key} "test4" + + 0@, 0@, 0@, 5@, 6@, 7@, 8@ = get_file_write_time {path} Test_File_Path + assert_result_true() + assert_eq(5@, 1@) // hour did not changed meanwhile + + // calculate small time stamp + 6@ *= 60000 // minutes to ms + 7@ *= 1000 // seconds to ms + 8@ += 6@ + 8@ += 7@ + + // timestamps delta + 8@ -= 4@ + assert_range(8@, 450, 550) // expected 500 plus some margin + end +end diff --git a/tests/cleo_tests/cleo_tester.inc b/tests/cleo_tests/cleo_tester.inc index 7776815a..5dbe36ea 100644 --- a/tests/cleo_tests/cleo_tester.inc +++ b/tests/cleo_tests/cleo_tester.inc @@ -198,6 +198,19 @@ function assert_neq(actual: int, expected: int) end end +/// checks if int value is within specified range, otherwise stops the test execution +function assert_range(actual: int, expectedMin: int, expectedMax: int) + _cleo_tester_increment_assert() + if + actual < expectedMin + then + _cleo_tester_fail() + trace "%08X to %08X expected~n~%08X occured" expectedMin expectedMax actual + breakpoint + terminate_this_custom_script + end +end + /// checks if two float values are equal, otherwise stops the test execution function assert_eqf(actual: float, expected: float) _cleo_tester_increment_assert() From b2e44bcd7a713ea073e29819f5def0cc3a7ff50e Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 21 Jul 2024 03:36:11 +0200 Subject: [PATCH 173/216] Updated validation of file handle input parameter. (#163) Updated validation of file handle input parameter. Allowed closing invalid handles in legacy mode. --- .../FileSystemOperations.cpp | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 93ea5f68..fbdec64f 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -8,10 +8,8 @@ using namespace CLEO; using namespace plugin; -#define READ_FILE_HANDLE_PARAM() CLEO_GetIntOpcodeParam(thread); \ - if((size_t)handle <= MinValidAddress) \ - { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } \ - else if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } +#define OPCODE_READ_PARAM_FILE_HANDLE(handle) auto handle = (DWORD)OPCODE_READ_PARAM_PTR(); \ + if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } class FileSystemOperations { @@ -158,20 +156,29 @@ class FileSystemOperations //0A9B=1,closefile %1d% static OpcodeResult WINAPI opcode_0A9B(CRunningScript* thread) { - DWORD handle = READ_FILE_HANDLE_PARAM(); + auto handle = OPCODE_READ_PARAM_INT(); if (m_hFiles.find(handle) != m_hFiles.end()) { File::close(handle); m_hFiles.erase(handle); } + else + { + if (!IsLegacyScript(thread)) + { + SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.\n\nTo ignore this error, change the file extension from .cs to .cs4 and restart the game.", handle, ScriptInfoStr(thread).c_str()); + return thread->Suspend(); + } + } + return OR_CONTINUE; } //0A9C=2,get_file_size %1d% store_to %2d% static OpcodeResult WINAPI opcode_0A9C(CRunningScript* thread) { - DWORD handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto size = File::getSize(handle); @@ -182,7 +189,7 @@ class FileSystemOperations //0A9D=3,read_from_file %1d% size %2d% store_to %3d% static OpcodeResult WINAPI opcode_0A9D(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto size = OPCODE_READ_PARAM_INT(); auto destination = OPCODE_READ_PARAM_OUTPUT_VAR_ANY32(); @@ -200,7 +207,7 @@ class FileSystemOperations //0A9E=3,write_to_file %1d% size %2d% from %3d% static OpcodeResult WINAPI opcode_0A9E(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto size = OPCODE_READ_PARAM_INT(); if (size < 0) @@ -266,7 +273,7 @@ class FileSystemOperations //0AD5=3, file_seek %1d% offset %2d% origin %3d% //IF and SET static OpcodeResult WINAPI opcode_0AD5(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto offset = OPCODE_READ_PARAM_INT(); auto origin = OPCODE_READ_PARAM_UINT(); @@ -279,7 +286,7 @@ class FileSystemOperations //0AD6=1, is_end_of_file_reached %1d% static OpcodeResult WINAPI opcode_0AD6(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); bool end = !File::isOk(handle) || File::isEndOfFile(handle); @@ -290,7 +297,7 @@ class FileSystemOperations //0AD7=3, read_string_from_file %1d% to %2d% size %3d% //IF and SET static OpcodeResult WINAPI opcode_0AD7(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto result = OPCODE_READ_PARAM_OUTPUT_VAR_STRING(); auto size = OPCODE_READ_PARAM_INT(); @@ -316,7 +323,7 @@ class FileSystemOperations //0AD8=2, write_string_to_file %1d% from %2d% //IF and SET static OpcodeResult WINAPI opcode_0AD8(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); OPCODE_READ_PARAM_STRING(text); auto ok = File::writeString(handle, text); @@ -334,7 +341,7 @@ class FileSystemOperations //0AD9=-1,write_formated_text %2d% to_file %1d% static OpcodeResult WINAPI opcode_0AD9(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); OPCODE_READ_PARAM_STRING(format); static char text[4 * MAX_STR_LEN]; CLEO_ReadParamsFormatted(thread, format, text, MAX_STR_LEN); @@ -351,7 +358,7 @@ class FileSystemOperations //0ADA=-1, %3d% = scan_file %1d% format %2d% //IF and SET static OpcodeResult WINAPI opcode_0ADA(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); OPCODE_READ_PARAM_STRING(format); auto result = OPCODE_READ_PARAM_OUTPUT_VAR_ANY32(); @@ -650,7 +657,7 @@ class FileSystemOperations //2300=2,get_file_position %1d% store_to %2d% static OpcodeResult WINAPI opcode_2300(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto pos = File::getPos(handle); @@ -661,7 +668,7 @@ class FileSystemOperations //2301=3,read_block_from_file %1d% size %2d% buffer %3d% // IF and SET static OpcodeResult WINAPI opcode_2301(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto size = OPCODE_READ_PARAM_INT(); auto destination = OPCODE_READ_PARAM_PTR(); @@ -691,7 +698,7 @@ class FileSystemOperations //2302=3, write_block_to_file %1d% size %2d% address %3d% // IF and SET static OpcodeResult WINAPI opcode_2302(CRunningScript* thread) { - auto handle = READ_FILE_HANDLE_PARAM(); + OPCODE_READ_PARAM_FILE_HANDLE(handle); auto size = OPCODE_READ_PARAM_INT(); auto source = OPCODE_READ_PARAM_PTR(); From 16afb29b3b6bec5896aba8ef791a0ab5f9f40dbe Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 21 Jul 2024 03:40:26 +0200 Subject: [PATCH 174/216] Various fixes (#164) Fixed detection of variable strings as constants in get_var_pointer opcode Allowed argument count mismatch in call function opcodes in legacy mode --- .../MemoryOperations/MemoryOperations.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 444c82bb..8c8937b1 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -85,32 +85,30 @@ class MemoryOperations // opcodes 0AA5 - 0AA8 static OpcodeResult CallFunctionGeneric(CLEO::CRunningScript* thread, void* func, void* obj, int numArg, int numPop, bool returnArg) { - int nVarArg = CLEO_GetVarArgCount(thread); - if (numArg + returnArg != nVarArg) // and return argument + auto inputArgCount = (int)CLEO_GetVarArgCount(thread) - returnArg; // return slot not counted as input argument + + constexpr size_t Max_Args = 32; + if (inputArgCount > Max_Args) { - SHOW_ERROR("Declared %d input args, but provided %d in script %s\nScript suspended.", numArg, (int)nVarArg - returnArg, CLEO::ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Provided more (%d) than supported (%d) arguments in script %s\nScript suspended.", inputArgCount, Max_Args, CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } - constexpr size_t Max_Args = 32; - if (nVarArg > Max_Args) + if (numArg != inputArgCount && !IsLegacyScript(thread)) // CLEO4 ignored param count missmatch (by providing zeros for missing) { - SHOW_ERROR("Provided more (%d) than supported (%d) arguments in script %s\nScript suspended.", nVarArg, Max_Args, CLEO::ScriptInfoStr(thread).c_str()); + SHOW_ERROR("Declared %d input args, but provided %d in script %s\nScript suspended.\n\nTo ignore this error, change the file extension from .cs to .cs4 and restart the game.", numArg, inputArgCount, CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } static SCRIPT_VAR arguments[Max_Args] = { 0 }; - SCRIPT_VAR* arguments_end = arguments + numArg; constexpr size_t Max_Text_Params = 5; static char textParams[Max_Text_Params][MAX_STR_LEN]; size_t currTextParam = 0; - numPop *= 4; // bytes peer argument - // retrieve parameters auto scriptParams = CLEO_GetOpcodeParamsArray(); - for (size_t i = 0; i < (size_t)numArg; i++) + for (size_t i = 0; i < std::min(numArg, inputArgCount); i++) { auto& param = arguments[i]; @@ -128,7 +126,7 @@ class MemoryOperations // read result from 0@ */ param.pParam = CLEO_GetPointerToScriptVariable(thread); - } + } else { if (currTextParam >= Max_Text_Params) @@ -155,6 +153,8 @@ class MemoryOperations } } + SCRIPT_VAR* arguments_end = arguments + numArg; + numPop *= 4; // bytes peer argument DWORD result; _asm { @@ -542,7 +542,7 @@ class MemoryOperations static OpcodeResult __stdcall opcode_0AC7(CLEO::CRunningScript* thread) { auto resultType = thread->PeekDataType(); - if (!IsVariable(resultType) && IsVarString(resultType)) + if (!IsVariable(resultType) && !IsVarString(resultType)) { SHOW_ERROR("Input argument #%d expected to be variable, got constant in script %s\nScript suspended.", CLEO_GetParamsHandledCount(), ScriptInfoStr(thread).c_str()); return thread->Suspend(); From ea1f7ba4515896396705630a7e9e263a8109fe2b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 28 Jul 2024 15:16:21 +0200 Subject: [PATCH 175/216] Forbidden filesystem opcodes from accessing outside game locations. (#168) --- CHANGELOG.md | 1 + cleo_plugins/Audio/Audio.vcxproj | 1 - cleo_plugins/Audio/Audio.vcxproj.filters | 3 --- cleo_plugins/DebugUtils/DebugUtils.vcxproj | 1 - cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters | 3 --- cleo_plugins/Text/CTextManager.cpp | 2 +- cleo_plugins/Text/Text.vcxproj | 1 - cleo_plugins/Text/Text.vcxproj.filters | 3 --- cleo_sdk/CLEO_Utils.h | 9 ++++++--- 9 files changed, 8 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b27249f..22bcf066 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - new opcode **2102 ([log_to_file](https://library.sannybuilder.com/#/sa/debug/2102))** - implemented support of opcodes **0662**, **0663** and **0664** (original Rockstar's script debugging opcodes. See DebugUtils.ini) - new [FileSystemOperations](https://github.com/cleolibrary/CLEO5/tree/master/cleo_plugins/FileSystemOperations) plugin + - forbidden scripts from accessing and changing any files outside game root or game settings directory - file related opcodes moved from CLEO core into separated plugin - opcode **0A9E ([write_to_file](https://library.sannybuilder.com/#/sa/file/0A9E))** now supports literal numbers and strings - fixed bug causing file stream opcodes not working correctly when read-write modes are used diff --git a/cleo_plugins/Audio/Audio.vcxproj b/cleo_plugins/Audio/Audio.vcxproj index 045b59e9..7893f49b 100644 --- a/cleo_plugins/Audio/Audio.vcxproj +++ b/cleo_plugins/Audio/Audio.vcxproj @@ -148,7 +148,6 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - diff --git a/cleo_plugins/Audio/Audio.vcxproj.filters b/cleo_plugins/Audio/Audio.vcxproj.filters index 71b9e853..dfa912cc 100644 --- a/cleo_plugins/Audio/Audio.vcxproj.filters +++ b/cleo_plugins/Audio/Audio.vcxproj.filters @@ -41,9 +41,6 @@ - - plugin_sdk - diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj b/cleo_plugins/DebugUtils/DebugUtils.vcxproj index 8c4e9332..926927ce 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj @@ -125,7 +125,6 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - diff --git a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters index 699e95c5..339e6e43 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters +++ b/cleo_plugins/DebugUtils/DebugUtils.vcxproj.filters @@ -3,9 +3,6 @@ - - sdk - sdk diff --git a/cleo_plugins/Text/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp index 5bb88ba5..06117f9d 100644 --- a/cleo_plugins/Text/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -93,7 +93,7 @@ namespace CLEO void CTextManager::LoadFxts() { // create FXT directory if not present yet - FS::create_directory(FS::path(CFileMgr::ms_rootDirName).append("cleo\\cleo_text")); + FS::create_directory(FS::path(Gta_Root_Dir_Path).append("cleo\\cleo_text")); // load whole FXT files directory auto list = CLEO::CLEO_ListDirectory(nullptr, "cleo\\cleo_text\\*.fxt", false, true); diff --git a/cleo_plugins/Text/Text.vcxproj b/cleo_plugins/Text/Text.vcxproj index 86bc04e1..eaada92c 100644 --- a/cleo_plugins/Text/Text.vcxproj +++ b/cleo_plugins/Text/Text.vcxproj @@ -129,7 +129,6 @@ xcopy /Y "$(OutDir)$(TargetName).*" "$(GTA_SA_DIR)\cleo\cleo_plugins\" - diff --git a/cleo_plugins/Text/Text.vcxproj.filters b/cleo_plugins/Text/Text.vcxproj.filters index 5543202f..632f5cbf 100644 --- a/cleo_plugins/Text/Text.vcxproj.filters +++ b/cleo_plugins/Text/Text.vcxproj.filters @@ -31,9 +31,6 @@ plugin_sdk - - plugin_sdk - plugin_sdk diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 3f50a9e4..0aa22c0a 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -13,7 +13,6 @@ #pragma once #include "CLEO.h" -#include "CFileMgr.h" // from GTA Plugin SDK #include "CPools.h" // from GTA Plugin SDK #include "shellapi.h" // game window minimize/maximize support #include @@ -75,6 +74,9 @@ namespace CLEO OPCODE_WRITE_PARAM_PTR(value) // memory address */ + static const char* Gta_Root_Dir_Path = (char*)0x00B71AE0; + static const char* Gta_User_Dir_Path = (char*)0x00C92368; + static bool IsLegacyScript(CLEO::CRunningScript* thread) { return CLEO_GetScriptVersion(thread) < CLEO_VER_5; @@ -83,7 +85,7 @@ namespace CLEO // this plugin's config file static std::string GetConfigFilename() { - std::string configFile = CFileMgr::ms_rootDirName; + std::string configFile = Gta_Root_Dir_Path; if (!configFile.empty() && configFile.back() != '\\') configFile.push_back('\\'); configFile += "cleo\\cleo_plugins\\" TARGET_NAME ".ini"; @@ -546,7 +548,8 @@ namespace CLEO #define OPCODE_READ_PARAMS_FORMATTED(_format, _varName) char _varName[2 * MAX_STR_LEN + 1]; char* _varName##Ok = CLEO_ReadParamsFormatted(thread, _format, _varName, sizeof(_varName)); - #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; + #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; \ + if(_strnicmp(##_varName, Gta_Root_Dir_Path, strlen(Gta_Root_Dir_Path)) != 0 && _strnicmp(##_varName, Gta_User_Dir_Path, strlen(Gta_User_Dir_Path)) != 0) { SHOW_ERROR("Forbidden file path '%s' outside game directories in script %s \nScript suspended.", ##_varName, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ From 5e81b14cbce758f21f68cf4f45f752ac73e20f2e Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 28 Jul 2024 15:25:42 +0200 Subject: [PATCH 176/216] Malware detection fixes 3 (#166) Removed duplicated log traces code. Fixed need for extra checks when calling FuncScriptDeleteDelegate. Restored proper file time stamp generation. Removed use of GetLocalTime. Removed hot reloading in CModuleSystem. Switched to simdjson library for loading opcodes database. Removed opcode database loading thread. Compiler settings updated. Output signing disabled. --- .github/workflows/main.yml | 36 +- .github/workflows/test.yml | 36 +- CLEO5.vcxproj | 22 +- CLEO5.vcxproj.filters | 44 +- source/CCustomOpcodeSystem.cpp | 29 +- source/CDebug.cpp | 69 +- source/CDebug.h | 13 +- source/CModuleSystem.cpp | 107 - source/CModuleSystem.h | 20 +- source/CleoBase.h | 3 +- source/OpcodeInfoDatabase.cpp | 124 +- source/OpcodeInfoDatabase.h | 4 +- source/ScmFunction.h | 1 - third-party/SimpleJSON/README.md | 98 - third-party/SimpleJSON/json.hpp | 650 - third-party/simdjson/README.md | 227 + third-party/simdjson/simdjson.cpp | 55928 +++++++++++++ third-party/simdjson/simdjson.h | 117769 +++++++++++++++++++++++++++ 18 files changed, 174071 insertions(+), 1109 deletions(-) delete mode 100644 third-party/SimpleJSON/README.md delete mode 100644 third-party/SimpleJSON/json.hpp create mode 100644 third-party/simdjson/README.md create mode 100644 third-party/simdjson/simdjson.cpp create mode 100644 third-party/simdjson/simdjson.h diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8a440e37..4f89b041 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,15 +31,15 @@ jobs: set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA - - name: Core - Sign - uses: x87/code-sign-action@develop - with: - certificate: '${{ secrets.DIG_KEY_CERT }}' - password: '${{ secrets.DIG_KEY_PWD }}' - certificatename: 'Seemann' - description: 'CLEO 5' - timestampUrl: 'http://timestamp.digicert.com' - filename: './.output/Release/cleo.asi' +# - name: Core - Sign +# uses: x87/code-sign-action@develop +# with: +# certificate: '${{ secrets.DIG_KEY_CERT }}' +# password: '${{ secrets.DIG_KEY_PWD }}' +# certificatename: 'Seemann' +# description: 'CLEO 5' +# timestampUrl: 'http://timestamp.digicert.com' +# filename: './.output/Release/cleo.asi' - name: Core - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 @@ -53,15 +53,15 @@ jobs: set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - name: Plugins - Sign - uses: x87/code-sign-action@develop - with: - certificate: '${{ secrets.DIG_KEY_CERT }}' - password: '${{ secrets.DIG_KEY_PWD }}' - certificatename: 'Seemann' - description: 'CLEO 5 Plugin' - timestampUrl: 'http://timestamp.digicert.com' - folder: './cleo_plugins/.output' +# - name: Plugins - Sign +# uses: x87/code-sign-action@develop +# with: +# certificate: '${{ secrets.DIG_KEY_CERT }}' +# password: '${{ secrets.DIG_KEY_PWD }}' +# certificatename: 'Seemann' +# description: 'CLEO 5 Plugin' +# timestampUrl: 'http://timestamp.digicert.com' +# folder: './cleo_plugins/.output' - name: Plugins - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 39f32e00..5ea22bac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,15 +27,15 @@ jobs: set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m CLEO5.sln /property:Configuration=Release /property:Platform=GTASA - - name: Core - Sign - uses: x87/code-sign-action@develop - with: - certificate: '${{ secrets.DIG_KEY_CERT }}' - password: '${{ secrets.DIG_KEY_PWD }}' - certificatename: 'Seemann' - description: 'CLEO 5' - timestampUrl: 'http://timestamp.digicert.com' - filename: './.output/Release/cleo.asi' +# - name: Core - Sign +# uses: x87/code-sign-action@develop +# with: +# certificate: '${{ secrets.DIG_KEY_CERT }}' +# password: '${{ secrets.DIG_KEY_PWD }}' +# certificatename: 'Seemann' +# description: 'CLEO 5' +# timestampUrl: 'http://timestamp.digicert.com' +# filename: './.output/Release/cleo.asi' - name: Core - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 @@ -49,15 +49,15 @@ jobs: set PLUGIN_SDK_DIR=%GITHUB_WORKSPACE%\third-party\plugin-sdk msbuild -m cleo_plugins/CLEO_Plugins.sln /property:Configuration=Release /property:Platform=x86 - - name: Plugins - Sign - uses: x87/code-sign-action@develop - with: - certificate: '${{ secrets.DIG_KEY_CERT }}' - password: '${{ secrets.DIG_KEY_PWD }}' - certificatename: 'Seemann' - description: 'CLEO 5 Plugin' - timestampUrl: 'http://timestamp.digicert.com' - folder: './cleo_plugins/.output' +# - name: Plugins - Sign +# uses: x87/code-sign-action@develop +# with: +# certificate: '${{ secrets.DIG_KEY_CERT }}' +# password: '${{ secrets.DIG_KEY_PWD }}' +# certificatename: 'Seemann' +# description: 'CLEO 5 Plugin' +# timestampUrl: 'http://timestamp.digicert.com' +# folder: './cleo_plugins/.output' - name: Plugins - VirusTotal Scan uses: crazy-max/ghaction-virustotal@v4 diff --git a/CLEO5.vcxproj b/CLEO5.vcxproj index fd6b68af..3634d5bb 100644 --- a/CLEO5.vcxproj +++ b/CLEO5.vcxproj @@ -72,6 +72,10 @@ NotUsing NotUsing + + NotUsing + NotUsing + @@ -94,6 +98,7 @@ + @@ -114,14 +119,14 @@ DynamicLibrary false - MultiByte + Unicode v143 true DynamicLibrary true - MultiByte + Unicode v143 @@ -139,14 +144,16 @@ $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\SimpleJSON;$(IncludePath) + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\simdjson;$(IncludePath) + false + false $(SolutionDir).output\$(Configuration)\ $(SolutionDir).output\.obj\$(Configuration)\ CLEO .asi - $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\SimpleJSON;$(IncludePath) + $(PLUGIN_SDK_DIR)\shared\;$(PLUGIN_SDK_DIR)\shared\game\;$(SolutionDir)third-party\simdjson;$(IncludePath) $(GTA_SA_DIR)\gta_sa.exe @@ -168,6 +175,10 @@ Create stdcpp17 None + Speed + false + false + false true @@ -178,7 +189,8 @@ Windows $(SolutionDir)source\cleo.def false - /Brepro %(AdditionalOptions) + false + false xcopy /Y "$(OutDir)$(TargetName).lib" "$(SolutionDir)cleo_sdk\" diff --git a/CLEO5.vcxproj.filters b/CLEO5.vcxproj.filters index 827563b3..ca3d4e19 100644 --- a/CLEO5.vcxproj.filters +++ b/CLEO5.vcxproj.filters @@ -7,9 +7,6 @@ {d188d452-fbc6-48b5-bd49-d4036c989109} - - {5cead5cc-9a75-4d2e-99b5-ebbc8f9d6d86} - {3104a2cb-d9c5-4eb5-9910-cf77d903db30} @@ -22,6 +19,15 @@ {9c8be703-c930-47b1-b0cb-7c4b80922a48} + + {d2916069-8bff-46e6-9a07-6b845c4361dd} + + + {2fba67c2-5ab9-4f75-82ed-e1024b272094} + + + {5cead5cc-9a75-4d2e-99b5-ebbc8f9d6d86} + @@ -31,31 +37,31 @@ source - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk source\utils @@ -94,22 +100,25 @@ source\extensions - plugin_sdk + third_party\plugin_sdk source\utils - plugin_sdk + third_party\plugin_sdk source\extensions - plugin_sdk + third_party\plugin_sdk - plugin_sdk + third_party\plugin_sdk + + + third_party\simdjson @@ -173,6 +182,9 @@ source\utils + + third_party\simdjson + diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 067292e3..ca08a3ca 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -6,7 +6,6 @@ #include "CCheat.h" #include "CModelInfo.h" -#include #include #include #include @@ -70,30 +69,8 @@ namespace CLEO template void operator+=(FuncScriptDeleteDelegateT mFunc) { funcs.push_back(mFunc); } template void operator-=(FuncScriptDeleteDelegateT mFunc) { funcs.erase(std::remove(funcs.begin(), funcs.end(), mFunc), funcs.end()); } void operator()(CRunningScript *script) - { - for (auto& f : funcs) - { - // check if function pointer lays within any of currently loaded modules (.asi or .cleo plugins) - void* ptr = f; - MODULEENTRY32 module; - module.dwSize = sizeof(MODULEENTRY32); - HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()); - Module32First(snapshot, &module); - if (snapshot != INVALID_HANDLE_VALUE) - { - size_t count = 0; - do - { - if(ptr >= module.modBaseAddr && ptr <= (module.modBaseAddr + module.modBaseSize)) - { - f(script); - break; - } - } while (Module32Next(snapshot, &module)); - CloseHandle(snapshot); - } - } - + { + for (auto& f : funcs) f(script); } }; ScriptDeleteDelegate scriptDeleteDelegate; @@ -749,7 +726,6 @@ namespace CLEO // handle program flow scmFunc->Return(cs); // jump back to cleo_call, right after last input param. Return slot var args starts here - if (scmFunc->moduleExportRef != nullptr) GetInstance().ModuleSystem.ReleaseModuleRef((char*)scmFunc->moduleExportRef); // exiting export - release module delete scmFunc; if (returnArgs) @@ -1021,7 +997,6 @@ namespace CLEO SHOW_ERROR("Not found module '%s' export '%s', requested by opcode [0AB1] in script %s", modulePath.c_str(), moduleTxt.c_str(), ((CCustomScript*)thread)->GetInfoStr().c_str()); return thread->Suspend(); } - scmFunc->moduleExportRef = scriptRef.base; // to be released on return reinterpret_cast(thread)->SetScriptFileDir(FS::path(modulePath).parent_path().string().c_str()); reinterpret_cast(thread)->SetScriptFileName(FS::path(modulePath).filename().string().c_str()); diff --git a/source/CDebug.cpp b/source/CDebug.cpp index 0ac92941..91c3a626 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -23,33 +23,7 @@ std::string stringPrintf(const char* format, ...) return result; } -void CDebug::Trace(eLogLevel level, const char* format, ...) -{ - va_list args; - va_start(args, format); - TraceVArg(level, format, args); - va_end(args); -} - -void CDebug::Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...) -{ - if(thread != nullptr && thread->IsCustom()) - { - const auto cs = (CCustomScript*)thread; - - if(cs->GetCompatibility() < CLEO_VER_5) - { - return; // do not log this in older versions - } - } - - va_list args; - va_start(args, format); - TraceVArg(level, format, args); - va_end(args); -} - -const char* CDebug::TraceVArg(CLEO::eLogLevel level, const char* format, va_list args) +void CDebug::Trace(CLEO::eLogLevel level, const char* msg) { std::lock_guard guard(mutex); @@ -57,12 +31,15 @@ const char* CDebug::TraceVArg(CLEO::eLogLevel level, const char* format, va_list // time stamp SYSTEMTIME t; - GetLocalTime(&t); + + //GetLocalTime(&t); + void (__stdcall * GTA_GetLocalTime)(LPSYSTEMTIME lpSystemTime) = memory_pointer(0x0081E514); // use ingame function instead as GetLocalTime seems to be considered suspicious by some AV software + GTA_GetLocalTime(&t); + sprintf(szBuf, "%02d/%02d/%04d %02d:%02d:%02d.%03d ", t.wDay, t.wMonth, t.wYear, t.wHour, t.wMinute, t.wSecond, t.wMilliseconds); char* stampEnd = szBuf + strlen(szBuf); - // put params into format - vsprintf(stampEnd, format, args); + strcpy(stampEnd, msg); // output to file if(m_hFile.good()) @@ -84,42 +61,12 @@ const char* CDebug::TraceVArg(CLEO::eLogLevel level, const char* format, va_list ((callback*)func)(level, stampEnd); } } - - return stampEnd; -} - -void CDebug::Error(const char* format, ...) -{ - va_list args; - va_start(args, format); - auto msg = TraceVArg(eLogLevel::Error, format, args); - va_end(args); - - QUERY_USER_NOTIFICATION_STATE pquns; - SHQueryUserNotificationState(&pquns); - bool fullscreen = (pquns == QUNS_BUSY) || (pquns == QUNS_RUNNING_D3D_FULL_SCREEN) || (pquns == QUNS_PRESENTATION_MODE); - - auto mainWnd = RsGlobal.ps->window; - - if(fullscreen) - { - PostMessage(mainWnd, WM_SYSCOMMAND, SC_MINIMIZE, 0); - ShowWindow(mainWnd, SW_MINIMIZE); - } - - MessageBox(mainWnd, msg, "CLEO error", MB_SYSTEMMODAL | MB_TOPMOST | MB_ICONERROR | MB_OK); - - if (fullscreen) - { - PostMessage(mainWnd, WM_SYSCOMMAND, SC_RESTORE, 0); - ShowWindow(mainWnd, SW_RESTORE); - } } extern "C" { void WINAPI CLEO_Log(eLogLevel level, const char* msg) { - Debug.Trace(level, "%s", msg); + Debug.Trace(level, msg); } } diff --git a/source/CDebug.h b/source/CDebug.h index 89dd1ae2..c1da6c0e 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -16,25 +16,22 @@ class CDebug Trace(CLEO::eLogLevel::Default, "Log started."); #ifdef _DEBUG - Trace(CLEO::eLogLevel::Default, "CLEO v%s DEBUG", CLEO_VERSION_STR); + CLEO::Trace(CLEO::eLogLevel::Default, "CLEO v%s DEBUG", CLEO_VERSION_STR); #else - Trace(CLEO::eLogLevel::Default, "CLEO v%s", CLEO_VERSION_STR); + CLEO::Trace(CLEO::eLogLevel::Default, "CLEO v%s", CLEO_VERSION_STR); #endif } ~CDebug() { - Trace(CLEO::eLogLevel::Default, "Log finished."); + CLEO::Trace(CLEO::eLogLevel::Default, "Log finished."); } - void Trace(CLEO::eLogLevel level, const char* format, ...); - void Trace(const CLEO::CRunningScript* thread, CLEO::eLogLevel level, const char* format, ...); - void Error(const char* format, ...); + void Trace(CLEO::eLogLevel level, const char* msg); private: std::mutex mutex; std::ofstream m_hFile; - const char* TraceVArg(CLEO::eLogLevel level, const char* format, va_list args); -}; + }; extern CDebug Debug; diff --git a/source/CModuleSystem.cpp b/source/CModuleSystem.cpp index 0e7d5597..66078032 100644 --- a/source/CModuleSystem.cpp +++ b/source/CModuleSystem.cpp @@ -3,7 +3,6 @@ #include "CModuleSystem.h" #include "FileEnumerator.h" -#include #include using namespace CLEO; @@ -35,10 +34,6 @@ const ScriptDataRef CModuleSystem::GetExport(std::string modulePath, std::string auto& module = it->second; auto e = module.GetExport(std::string(exportName)); - if (e.Valid()) - { - module.refCount++; - } return e; } @@ -72,34 +67,6 @@ bool CModuleSystem::LoadCleoModules() return LoadDirectory(path.c_str()); } -void CLEO::CModuleSystem::AddModuleRef(const char* baseIP) -{ - for (auto& it : modules) - { - auto& module = it.second; - - if (module.data.data() == baseIP) - { - module.refCount++; - return; - } - } -} - -void CLEO::CModuleSystem::ReleaseModuleRef(const char* baseIP) -{ - for (auto& it : modules) - { - auto& module = it.second; - - if (module.data.data() == baseIP) - { - module.refCount--; - return; - } - } -} - void CModuleSystem::NormalizePath(std::string& path) { for (char& c : path) @@ -113,73 +80,11 @@ void CModuleSystem::NormalizePath(std::string& path) }; } -void CModuleSystem::CModule::Update() -{ - while (updateActive) - { - if (!updateNeeded) - { - FS::file_time_type time; - try - { - time = FS::last_write_time(filepath); - } - catch (...) - { - time = {}; - } - - // file not exists or up to date - if (time == FS::file_time_type{} || time == fileTime) - { - // query files once a second - for(size_t i = 0; i < 100 && updateActive; i++) - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - - continue; - } - - updateNeeded = true; - } - - if (refCount != 0) - { - continue; // module currently in use - } - - auto file = filepath; - auto result = LoadFromFile(file.c_str()); - updateNeeded = false; - Debug.Trace(eLogLevel::Debug, "Module reload %s '%s'", result ? "OK" : "FAILED", file.c_str()); - } -} - -CModuleSystem::CModule::CModule() : - updateThread(&CModuleSystem::CModule::Update, this) -{ -} - -CModuleSystem::CModule::~CModule() -{ - updateActive = false; - updateThread.join(); -} - void CModuleSystem::CModule::Clear() { - if (refCount != 0) - { - TRACE("Warning! Module '%s' cleared despite in use %d time(s)", filepath.c_str(), refCount.load()); - } - - std::lock_guard guard(updateMutex); - filepath.clear(); data.clear(); exports.clear(); - - refCount = 0; - fileTime = {}; } const char* CModuleSystem::CModule::GetFilepath() const @@ -190,20 +95,8 @@ const char* CModuleSystem::CModule::GetFilepath() const bool CModuleSystem::CModule::LoadFromFile(const char* path) { Clear(); - - std::lock_guard guard(updateMutex); - filepath = path; - try - { - fileTime = FS::last_write_time(path); - } - catch(...) - { - fileTime = {}; - } - std::ifstream file(path, std::ios::binary); if (!file.good()) { diff --git a/source/CModuleSystem.h b/source/CModuleSystem.h index b78aee14..869a7cd8 100644 --- a/source/CModuleSystem.h +++ b/source/CModuleSystem.h @@ -1,8 +1,5 @@ #pragma once -#include #include -#include -#include namespace CLEO { @@ -32,10 +29,6 @@ namespace CLEO bool LoadDirectory(const char* const path); // all modules in directory bool LoadCleoModules(); // all in cleo\cleo_modules - // marking modules usage - void AddModuleRef(const char* baseIP); - void ReleaseModuleRef(const char* baseIP); - private: static void NormalizePath(std::string& path); @@ -58,19 +51,10 @@ namespace CLEO std::vector data; std::map exports; - // hot reloading when source file modified - std::atomic refCount = 0; - FS::file_time_type fileTime; // last write time of source file - void Update(); - std::atomic updateActive = true; - std::atomic updateNeeded = false; - std::mutex updateMutex; - std::thread updateThread; - public: - CModule(); + CModule() = default; CModule(const CModule&) = delete; // no copying - ~CModule(); + ~CModule() = default; void Clear(); const char* GetFilepath() const; diff --git a/source/CleoBase.h b/source/CleoBase.h index 06cb7f25..8b5180b8 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -22,14 +22,15 @@ namespace CLEO std::map> m_callbacks; public: + // order here defines init and deinit and order! CDmaFix DmaFix; CGameMenu GameMenu; CCodeInjector CodeInjector; + CPluginSystem PluginSystem; CGameVersionManager VersionManager; CScriptEngine ScriptEngine; CCustomOpcodeSystem OpcodeSystem; CModuleSystem ModuleSystem; - CPluginSystem PluginSystem; OpcodeInfoDatabase OpcodeInfoDb; int saveSlot = -1; // -1 if not loaded from save diff --git a/source/OpcodeInfoDatabase.cpp b/source/OpcodeInfoDatabase.cpp index 335fc3fa..438a94f5 100644 --- a/source/OpcodeInfoDatabase.cpp +++ b/source/OpcodeInfoDatabase.cpp @@ -1,124 +1,96 @@ #include "stdafx.h" #include "OpcodeInfoDatabase.h" -#include "json.hpp" -#include +#include "simdjson.h" #include -#include using namespace std; -using namespace json; +using namespace simdjson; -bool OpcodeInfoDatabase::_Load(const std::string filepath) +bool OpcodeInfoDatabase::Load(const char* filepath) { Clear(); - ifstream file(filepath.c_str()); - if (file.fail()) - { - TRACE("Failed to open opcodes database '%s' file.", filepath.c_str()); - return false; - } - - file.seekg(0, ifstream::end); - auto size = file.tellg(); - file.seekg(0, ifstream::beg); + dom::parser parser; + dom::element root; - if (size > 8 * 1024 * 1024) // 8MB is reasonable json file size upper limit - { - TRACE("Opcodes database '%s' file too large to load.", filepath.c_str()); + auto error = parser.load(filepath).get(root); + if (error) + { + TRACE("Failed to parse opcodes database '%s' file. Code %d", filepath, error); return false; } - string text; - text.resize((size_t)size); - file.read(text.data(), size); - file.close(); - - if (file.fail()) + const char* version; + if (root["meta"]["version"].get_c_str().get(version)) { - TRACE("Error while reading opcodes database '%s' file.", filepath.c_str()); + TRACE("Invalid opcodes database '%s' file.", filepath); return false; } - JSON root; - try - { - root = JSON::Load(text.c_str()); - } - catch (const exception& ex) + dom::array ext; + if (root["extensions"].get_array().get(ext)) { - TRACE("Error while parsing opcodes database '%s' file:\n%s", filepath.c_str(), ex.what()); + TRACE("Invalid opcodes database '%s' file.", filepath); return false; } - if (root.IsNull() || root["extensions"].JSONType() != JSON::Class::Array) + for (auto& e : ext) { - TRACE("Invalid opcodes database '%s' file.", filepath.c_str()); - return false; - } + Extension extension; - for (auto& e : root["extensions"].ArrayRange()) - { - auto name = e["name"]; - auto commands = e["commands"]; - if (name.JSONType() != JSON::Class::String || commands.JSONType() != JSON::Class::Array) + const char* name; + if (e["name"].get_c_str().get(name)) { continue; // invalid extension } + extension.name = name; - Extension extension; - extension.name = name.ToString(); + dom::array commands; + if (e["commands"].get_array().get(commands)) + { + continue; // invalid extension + } - for (auto& c : commands.ArrayRange()) + for (auto& c : commands) { - auto commandId = c["id"]; - auto commandName = c["name"]; - if (commandId.JSONType() != JSON::Class::String || commandName.JSONType() != JSON::Class::String) + bool unsupported; + if (!c["attrs"]["is_unsupported"].get_bool().get(unsupported) && unsupported) + { + continue; // command defined as unsupported + } + + const char* commandId; + if (c["id"].get_c_str().get(commandId)) { continue; // invalid command } - auto attributes = c["attrs"]; - if (attributes.JSONType() == JSON::Class::Object) + const char* commandName; + if (c["name"].get_c_str().get(commandName)) { - auto unsupported = attributes["is_unsupported"]; - if (unsupported.JSONType() == JSON::Class::Boolean && unsupported.ToBool()) - { - continue; // command defined as unsupported - } + continue; // invalid command } - auto idLong = stoul(commandId.ToString(), nullptr, 16); + auto idLong = stoul(commandId, nullptr, 16); if (idLong > 0x7FFF) { continue; // opcode out of bounds } auto id = (uint16_t)idLong; - extension.opcodes.emplace(piecewise_construct, make_tuple(id), make_tuple(id, commandName.ToString())); + extension.opcodes.emplace(piecewise_construct, make_tuple(id), make_tuple(id, commandName)); auto& opcode = extension.opcodes.at(id); // read arguments info - auto inputArgs = c["input"]; - if (inputArgs.JSONType() == JSON::Class::Array) - { - for (auto& p : inputArgs.ArrayRange()) - { - if (p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) - { - opcode.arguments.emplace_back(p["name"].ToString().c_str()); - } - } - } - - auto outputArgs = c["output"]; - if (outputArgs.JSONType() == JSON::Class::Array) + dom::array inputArgs; + if (!c["input"].get_array().get(inputArgs)) { - for (auto& p : outputArgs.ArrayRange()) + for (auto& p : inputArgs) { - if (p.JSONType() == JSON::Class::Object && p["name"].JSONType() == JSON::Class::String) + const char* argName; + if (!p["name"].get_c_str().get(argName)) { - opcode.arguments.emplace_back(p["name"].ToString().c_str()); + opcode.arguments.emplace_back(argName); } } } @@ -130,6 +102,7 @@ bool OpcodeInfoDatabase::_Load(const std::string filepath) } } + TRACE("Opcodes database version %s loaded from '%s'", version, filepath); ok = true; return true; } @@ -140,11 +113,6 @@ void OpcodeInfoDatabase::Clear() extensions.clear(); } -void OpcodeInfoDatabase::Load(const char* filepath) -{ - thread(&OpcodeInfoDatabase::_Load, this, std::string(filepath)).detach(); // asynchronic execute -} - const char* OpcodeInfoDatabase::GetExtensionName(uint16_t opcode) const { if (ok) diff --git a/source/OpcodeInfoDatabase.h b/source/OpcodeInfoDatabase.h index 663e2e14..ea08713f 100644 --- a/source/OpcodeInfoDatabase.h +++ b/source/OpcodeInfoDatabase.h @@ -37,13 +37,11 @@ class OpcodeInfoDatabase std::atomic ok = false; std::map extensions; - bool _Load(const std::string filepath); - public: OpcodeInfoDatabase() = default; void Clear(); - void Load(const char* filepath); // triggers asynchronic load + bool Load(const char* filepath); // triggers asynchronic load const char* GetExtensionName(uint16_t opcode) const; // nullptr if not found const char* GetExtensionName(const char* commandName) const; // nullptr if not found diff --git a/source/ScmFunction.h b/source/ScmFunction.h index 3a7988d2..f176a394 100644 --- a/source/ScmFunction.h +++ b/source/ScmFunction.h @@ -29,7 +29,6 @@ namespace CLEO bool savedCondResult; eLogicalOperation savedLogicalOp; bool savedNotFlag; - void* moduleExportRef = 0; // modules switching. Points to modules baseIP in case if this is export call std::string savedScriptFileDir; // modules switching std::string savedScriptFileName; // modules switching diff --git a/third-party/SimpleJSON/README.md b/third-party/SimpleJSON/README.md deleted file mode 100644 index 26f347d4..00000000 --- a/third-party/SimpleJSON/README.md +++ /dev/null @@ -1,98 +0,0 @@ -# SimpleJSON -Simple C++ JSON library - -# Source -https://github.com/nbsdx/SimpleJSON - -## License -Do what the fuck you want public license - -## About -SimpleJSON is a lightweight JSON library for exporting data in JSON format from C++. By taking advantage of templates and operator overloading on the backend, you're able to create and work with JSON objects right away, just as you would expect from a language such as JavaScript. SimpleJSON is a single C++ Header file, "json.hpp". Feel free to download this file on its own, and include it in your project. No other requirements! - -#### Platforms -SimpleJSON should work on any platform; it's only requirement is a C++11 compatible compiler, as it make heavy use of the C++11 move semantics, and variadic templates. The tests are tailored for linux, but could be ported to any platform with python support and a C++11 compiler. - -## API -You can find the API [over here](API.md). For now it's just a Markdown file with C++ syntax highlighting, but it's better than nothing! - -## Upcoming Features -SimpleJSON is still missing some features, which I hope to get done soon! -* Write more test cases to cover all major components( mostly parsing ) - -One of the biggests goals for SimpleJSON is for it to be lightweight, and small. Having complicated logic isn't bad, but it bloats the codebase in most cases. I'd like to keep things small rather than put in big features that take a ton of space. - -If you run into any bugs, or see that I'm missing a featuer, please submit an issue through GitHub and I'll respond as soon as I can! - -## Example -More examples can be found in the 'examples' directory. Check out [the API](API.md) for a full list of functions. - -```cpp -#include "json.hpp" - -int main() { - json::JSON obj; - // Create a new Array as a field of an Object. - obj["array"] = json::Array( true, "Two", 3, 4.0 ); - // Create a new Object as a field of another Object. - obj["obj"] = json::Object(); - // Assign to one of the inner object's fields - obj["obj"]["inner"] = "Inside"; - - // We don't need to specify the type of the JSON object: - obj["new"]["some"]["deep"]["key"] = "Value"; - obj["array2"].append( false, "three" ); - - // We can also parse a string into a JSON object: - obj["parsed"] = JSON::Load( "[ { \"Key\" : \"Value\" }, false ]" ); - - std::cout << obj << std::endl; -} -``` -Output: -``` -{ - "array" : [true, "Two", 3, 4.000000], - "array2" : [false, "three"], - "new" : { - "some" : { - "deep" : { - "key" : "Value" - } - } - }, - "obj" : { - "inner" : "Inside" - }, - "parsed" : [{ - "Key" : "Value" - }, false] -} -``` - -This example can also be written another way: -```cpp -#include "json.hpp" -#include - -using json::JSON; - -int main() { - JSON obj = { - "array", json::Array( true, "Two", 3, 4.0 ), - "obj", { - "inner", "Inside" - }, - "new", { - "some", { - "deep", { - "key", "Value" - } - } - }, - "array2", json::Array( false, "three" ) - }; - - std::cout << obj << std::endl; -``` -Sadly, we don't have access to the : character in C++, so we can't use that to seperate key-value pairs, but by using commas, we can achieve a very similar effect. The other point you might notice, is that we have to explictly create arrays. This is a limitation of C++'s operator overloading rules, so we can't use the [] operator to define the array :( I'm looking into ways to make this smoother. diff --git a/third-party/SimpleJSON/json.hpp b/third-party/SimpleJSON/json.hpp deleted file mode 100644 index 341a292d..00000000 --- a/third-party/SimpleJSON/json.hpp +++ /dev/null @@ -1,650 +0,0 @@ -// Miran: I edited it to make numbers type "long long" instead of just "long", as sa.json won't load otherwise. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace json { - -using std::map; -using std::deque; -using std::string; -using std::enable_if; -using std::initializer_list; -using std::is_same; -using std::is_convertible; -using std::is_integral; -using std::is_floating_point; - -namespace { - string json_escape( const string &str ) { - string output; - for( unsigned i = 0; i < str.length(); ++i ) - switch( str[i] ) { - case '\"': output += "\\\""; break; - case '\\': output += "\\\\"; break; - case '\b': output += "\\b"; break; - case '\f': output += "\\f"; break; - case '\n': output += "\\n"; break; - case '\r': output += "\\r"; break; - case '\t': output += "\\t"; break; - default : output += str[i]; break; - } - return std::move( output ); - } -} - -class JSON -{ - union BackingData { - BackingData( double d ) : Float( d ){} - BackingData( long long l ) : Int( l ){} - BackingData( bool b ) : Bool( b ){} - BackingData( string s ) : String( new string( s ) ){} - BackingData() : Int( 0 ){} - - deque *List; - map *Map; - string *String; - double Float; - long long Int; - bool Bool; - } Internal; - - public: - enum class Class { - Null, - Object, - Array, - String, - Floating, - Integral, - Boolean - }; - - template - class JSONWrapper { - Container *object; - - public: - JSONWrapper( Container *val ) : object( val ) {} - JSONWrapper( std::nullptr_t ) : object( nullptr ) {} - - typename Container::iterator begin() { return object ? object->begin() : typename Container::iterator(); } - typename Container::iterator end() { return object ? object->end() : typename Container::iterator(); } - typename Container::const_iterator begin() const { return object ? object->begin() : typename Container::iterator(); } - typename Container::const_iterator end() const { return object ? object->end() : typename Container::iterator(); } - }; - - template - class JSONConstWrapper { - const Container *object; - - public: - JSONConstWrapper( const Container *val ) : object( val ) {} - JSONConstWrapper( std::nullptr_t ) : object( nullptr ) {} - - typename Container::const_iterator begin() const { return object ? object->begin() : typename Container::const_iterator(); } - typename Container::const_iterator end() const { return object ? object->end() : typename Container::const_iterator(); } - }; - - JSON() : Internal(), Type( Class::Null ){} - - JSON( initializer_list list ) - : JSON() - { - SetType( Class::Object ); - for( auto i = list.begin(), e = list.end(); i != e; ++i, ++i ) - operator[]( i->ToString() ) = *std::next( i ); - } - - JSON( JSON&& other ) - : Internal( other.Internal ) - , Type( other.Type ) - { other.Type = Class::Null; other.Internal.Map = nullptr; } - - JSON& operator=( JSON&& other ) { - ClearInternal(); - Internal = other.Internal; - Type = other.Type; - other.Internal.Map = nullptr; - other.Type = Class::Null; - return *this; - } - - JSON( const JSON &other ) { - switch( other.Type ) { - case Class::Object: - Internal.Map = - new map( other.Internal.Map->begin(), - other.Internal.Map->end() ); - break; - case Class::Array: - Internal.List = - new deque( other.Internal.List->begin(), - other.Internal.List->end() ); - break; - case Class::String: - Internal.String = - new string( *other.Internal.String ); - break; - default: - Internal = other.Internal; - } - Type = other.Type; - } - - JSON& operator=( const JSON &other ) { - ClearInternal(); - switch( other.Type ) { - case Class::Object: - Internal.Map = - new map( other.Internal.Map->begin(), - other.Internal.Map->end() ); - break; - case Class::Array: - Internal.List = - new deque( other.Internal.List->begin(), - other.Internal.List->end() ); - break; - case Class::String: - Internal.String = - new string( *other.Internal.String ); - break; - default: - Internal = other.Internal; - } - Type = other.Type; - return *this; - } - - ~JSON() { - switch( Type ) { - case Class::Array: - delete Internal.List; - break; - case Class::Object: - delete Internal.Map; - break; - case Class::String: - delete Internal.String; - break; - default:; - } - } - - template - JSON( T b, typename enable_if::value>::type* = 0 ) : Internal( b ), Type( Class::Boolean ){} - - template - JSON( T i, typename enable_if::value && !is_same::value>::type* = 0 ) : Internal( (long long)i ), Type( Class::Integral ){} - - template - JSON( T f, typename enable_if::value>::type* = 0 ) : Internal( (double)f ), Type( Class::Floating ){} - - template - JSON( T s, typename enable_if::value>::type* = 0 ) : Internal( string( s ) ), Type( Class::String ){} - - JSON( std::nullptr_t ) : Internal(), Type( Class::Null ){} - - static JSON Make( Class type ) { - JSON ret; ret.SetType( type ); - return ret; - } - - static JSON Load( const string & ); - - template - void append( T arg ) { - SetType( Class::Array ); Internal.List->emplace_back( arg ); - } - - template - void append( T arg, U... args ) { - append( arg ); append( args... ); - } - - template - typename enable_if::value, JSON&>::type operator=( T b ) { - SetType( Class::Boolean ); Internal.Bool = b; return *this; - } - - template - typename enable_if::value && !is_same::value, JSON&>::type operator=( T i ) { - SetType( Class::Integral ); Internal.Int = i; return *this; - } - - template - typename enable_if::value, JSON&>::type operator=( T f ) { - SetType( Class::Floating ); Internal.Float = f; return *this; - } - - template - typename enable_if::value, JSON&>::type operator=( T s ) { - SetType( Class::String ); *Internal.String = string( s ); return *this; - } - - JSON& operator[]( const string &key ) { - SetType( Class::Object ); return Internal.Map->operator[]( key ); - } - - JSON& operator[]( unsigned index ) { - SetType( Class::Array ); - if( index >= Internal.List->size() ) Internal.List->resize( index + 1 ); - return Internal.List->operator[]( index ); - } - - JSON &at( const string &key ) { - return operator[]( key ); - } - - const JSON &at( const string &key ) const { - return Internal.Map->at( key ); - } - - JSON &at( unsigned index ) { - return operator[]( index ); - } - - const JSON &at( unsigned index ) const { - return Internal.List->at( index ); - } - - int length() const { - if( Type == Class::Array ) - return Internal.List->size(); - else - return -1; - } - - bool hasKey( const string &key ) const { - if( Type == Class::Object ) - return Internal.Map->find( key ) != Internal.Map->end(); - return false; - } - - int size() const { - if( Type == Class::Object ) - return Internal.Map->size(); - else if( Type == Class::Array ) - return Internal.List->size(); - else - return -1; - } - - Class JSONType() const { return Type; } - - /// Functions for getting primitives from the JSON object. - bool IsNull() const { return Type == Class::Null; } - - string ToString() const { bool b; return std::move( ToString( b ) ); } - string ToString( bool &ok ) const { - ok = (Type == Class::String); - return ok ? std::move( json_escape( *Internal.String ) ): string(""); - } - - double ToFloat() const { bool b; return ToFloat( b ); } - double ToFloat( bool &ok ) const { - ok = (Type == Class::Floating); - return ok ? Internal.Float : 0.0; - } - - long long ToInt() const { bool b; return ToInt( b ); } - long long ToInt( bool &ok ) const { - ok = (Type == Class::Integral); - return ok ? Internal.Int : 0; - } - - bool ToBool() const { bool b; return ToBool( b ); } - bool ToBool( bool &ok ) const { - ok = (Type == Class::Boolean); - return ok ? Internal.Bool : false; - } - - JSONWrapper> ObjectRange() { - if( Type == Class::Object ) - return JSONWrapper>( Internal.Map ); - return JSONWrapper>( nullptr ); - } - - JSONWrapper> ArrayRange() { - if( Type == Class::Array ) - return JSONWrapper>( Internal.List ); - return JSONWrapper>( nullptr ); - } - - JSONConstWrapper> ObjectRange() const { - if( Type == Class::Object ) - return JSONConstWrapper>( Internal.Map ); - return JSONConstWrapper>( nullptr ); - } - - - JSONConstWrapper> ArrayRange() const { - if( Type == Class::Array ) - return JSONConstWrapper>( Internal.List ); - return JSONConstWrapper>( nullptr ); - } - - string dump( int depth = 1, string tab = " ") const { - string pad = ""; - for( int i = 0; i < depth; ++i, pad += tab ); - - switch( Type ) { - case Class::Null: - return "null"; - case Class::Object: { - string s = "{\n"; - bool skip = true; - for( auto &p : *Internal.Map ) { - if( !skip ) s += ",\n"; - s += ( pad + "\"" + p.first + "\" : " + p.second.dump( depth + 1, tab ) ); - skip = false; - } - s += ( "\n" + pad.erase( 0, 2 ) + "}" ) ; - return s; - } - case Class::Array: { - string s = "["; - bool skip = true; - for( auto &p : *Internal.List ) { - if( !skip ) s += ", "; - s += p.dump( depth + 1, tab ); - skip = false; - } - s += "]"; - return s; - } - case Class::String: - return "\"" + json_escape( *Internal.String ) + "\""; - case Class::Floating: - return std::to_string( Internal.Float ); - case Class::Integral: - return std::to_string( Internal.Int ); - case Class::Boolean: - return Internal.Bool ? "true" : "false"; - default: - return ""; - } - return ""; - } - - friend std::ostream& operator<<( std::ostream&, const JSON & ); - - private: - void SetType( Class type ) { - if( type == Type ) - return; - - ClearInternal(); - - switch( type ) { - case Class::Null: Internal.Map = nullptr; break; - case Class::Object: Internal.Map = new map(); break; - case Class::Array: Internal.List = new deque(); break; - case Class::String: Internal.String = new string(); break; - case Class::Floating: Internal.Float = 0.0; break; - case Class::Integral: Internal.Int = 0; break; - case Class::Boolean: Internal.Bool = false; break; - } - - Type = type; - } - - private: - /* beware: only call if YOU know that Internal is allocated. No checks performed here. - This function should be called in a constructed JSON just before you are going to - overwrite Internal... - */ - void ClearInternal() { - switch( Type ) { - case Class::Object: delete Internal.Map; break; - case Class::Array: delete Internal.List; break; - case Class::String: delete Internal.String; break; - default:; - } - } - - private: - - Class Type = Class::Null; -}; - -JSON Array() { - return std::move( JSON::Make( JSON::Class::Array ) ); -} - -template -JSON Array( T... args ) { - JSON arr = JSON::Make( JSON::Class::Array ); - arr.append( args... ); - return std::move( arr ); -} - -JSON Object() { - return std::move( JSON::Make( JSON::Class::Object ) ); -} - -std::ostream& operator<<( std::ostream &os, const JSON &json ) { - os << json.dump(); - return os; -} - -namespace { - JSON parse_next( const string &, size_t & ); - - void consume_ws( const string &str, size_t &offset ) { - while( isspace( str[offset] ) ) ++offset; - } - - JSON parse_object( const string &str, size_t &offset ) { - JSON Object = JSON::Make( JSON::Class::Object ); - - ++offset; - consume_ws( str, offset ); - if( str[offset] == '}' ) { - ++offset; return std::move( Object ); - } - - while( true ) { - JSON Key = parse_next( str, offset ); - consume_ws( str, offset ); - if( str[offset] != ':' ) { - std::cerr << "Error: Object: Expected colon, found '" << str[offset] << "'\n"; - break; - } - consume_ws( str, ++offset ); - JSON Value = parse_next( str, offset ); - Object[Key.ToString()] = Value; - - consume_ws( str, offset ); - if( str[offset] == ',' ) { - ++offset; continue; - } - else if( str[offset] == '}' ) { - ++offset; break; - } - else { - std::cerr << "ERROR: Object: Expected comma, found '" << str[offset] << "'\n"; - break; - } - } - - return std::move( Object ); - } - - JSON parse_array( const string &str, size_t &offset ) { - JSON Array = JSON::Make( JSON::Class::Array ); - unsigned index = 0; - - ++offset; - consume_ws( str, offset ); - if( str[offset] == ']' ) { - ++offset; return std::move( Array ); - } - - while( true ) { - Array[index++] = parse_next( str, offset ); - consume_ws( str, offset ); - - if( str[offset] == ',' ) { - ++offset; continue; - } - else if( str[offset] == ']' ) { - ++offset; break; - } - else { - std::cerr << "ERROR: Array: Expected ',' or ']', found '" << str[offset] << "'\n"; - return std::move( JSON::Make( JSON::Class::Array ) ); - } - } - - return std::move( Array ); - } - - JSON parse_string( const string &str, size_t &offset ) { - JSON String; - string val; - for( char c = str[++offset]; c != '\"' ; c = str[++offset] ) { - if( c == '\\' ) { - switch( str[ ++offset ] ) { - case '\"': val += '\"'; break; - case '\\': val += '\\'; break; - case '/' : val += '/' ; break; - case 'b' : val += '\b'; break; - case 'f' : val += '\f'; break; - case 'n' : val += '\n'; break; - case 'r' : val += '\r'; break; - case 't' : val += '\t'; break; - case 'u' : { - val += "\\u" ; - for( unsigned i = 1; i <= 4; ++i ) { - c = str[offset+i]; - if( (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') ) - val += c; - else { - std::cerr << "ERROR: String: Expected hex character in unicode escape, found '" << c << "'\n"; - return std::move( JSON::Make( JSON::Class::String ) ); - } - } - offset += 4; - } break; - default : val += '\\'; break; - } - } - else - val += c; - } - ++offset; - String = val; - return std::move( String ); - } - - JSON parse_number( const string &str, size_t &offset ) { - JSON Number; - string val, exp_str; - char c; - bool isDouble = false; - long exp = 0; - while( true ) { - c = str[offset++]; - if( (c == '-') || (c >= '0' && c <= '9') ) - val += c; - else if( c == '.' ) { - val += c; - isDouble = true; - } - else - break; - } - if( c == 'E' || c == 'e' ) { - c = str[ offset++ ]; - if( c == '-' ){ ++offset; exp_str += '-';} - while( true ) { - c = str[ offset++ ]; - if( c >= '0' && c <= '9' ) - exp_str += c; - else if( !isspace( c ) && c != ',' && c != ']' && c != '}' ) { - std::cerr << "ERROR: Number: Expected a number for exponent, found '" << c << "'\n"; - return std::move( JSON::Make( JSON::Class::Null ) ); - } - else - break; - } - exp = std::stol( exp_str ); - } - else if( !isspace( c ) && c != ',' && c != ']' && c != '}' ) { - std::cerr << "ERROR: Number: unexpected character '" << c << "'\n"; - return std::move( JSON::Make( JSON::Class::Null ) ); - } - --offset; - - if( isDouble ) - Number = std::stod( val ) * std::pow( 10, exp ); - else { - if( !exp_str.empty() ) - Number = std::stoll( val ) * std::pow( 10, exp ); - else - Number = std::stoll( val ); - } - return std::move( Number ); - } - - JSON parse_bool( const string &str, size_t &offset ) { - JSON Bool; - if( str.substr( offset, 4 ) == "true" ) - Bool = true; - else if( str.substr( offset, 5 ) == "false" ) - Bool = false; - else { - std::cerr << "ERROR: Bool: Expected 'true' or 'false', found '" << str.substr( offset, 5 ) << "'\n"; - return std::move( JSON::Make( JSON::Class::Null ) ); - } - offset += (Bool.ToBool() ? 4 : 5); - return std::move( Bool ); - } - - JSON parse_null( const string &str, size_t &offset ) { - JSON Null; - if( str.substr( offset, 4 ) != "null" ) { - std::cerr << "ERROR: Null: Expected 'null', found '" << str.substr( offset, 4 ) << "'\n"; - return std::move( JSON::Make( JSON::Class::Null ) ); - } - offset += 4; - return std::move( Null ); - } - - JSON parse_next( const string &str, size_t &offset ) { - char value; - consume_ws( str, offset ); - value = str[offset]; - switch( value ) { - case '[' : return std::move( parse_array( str, offset ) ); - case '{' : return std::move( parse_object( str, offset ) ); - case '\"': return std::move( parse_string( str, offset ) ); - case 't' : - case 'f' : return std::move( parse_bool( str, offset ) ); - case 'n' : return std::move( parse_null( str, offset ) ); - default : if( ( value <= '9' && value >= '0' ) || value == '-' ) - return std::move( parse_number( str, offset ) ); - } - std::cerr << "ERROR: Parse: Unknown starting character '" << value << "'\n"; - return JSON(); - } -} - -JSON JSON::Load( const string &str ) { - size_t offset = 0; - return std::move( parse_next( str, offset ) ); -} - -} // End Namespace json diff --git a/third-party/simdjson/README.md b/third-party/simdjson/README.md new file mode 100644 index 00000000..6c566344 --- /dev/null +++ b/third-party/simdjson/README.md @@ -0,0 +1,227 @@ + +[![Ubuntu 20.04 CI](https://github.com/simdjson/simdjson/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)](https://simdjson.org/plots.html) +[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/simdjson.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:simdjson) +[![][license img]][license] + +[![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](https://simdjson.github.io/simdjson/) + +simdjson : Parsing gigabytes of JSON per second +=============================================== + + +JSON is everywhere on the Internet. Servers spend a *lot* of time parsing it. We need a fresh +approach. The simdjson library uses commonly available SIMD instructions and microparallel algorithms +to parse JSON 4x faster than RapidJSON and 25x faster than JSON for Modern C++. + +* **Fast:** Over 4x faster than commonly used production-grade JSON parsers. +* **Record Breaking Features:** Minify JSON at 6 GB/s, validate UTF-8 at 13 GB/s, NDJSON at 3.5 GB/s. +* **Easy:** First-class, easy to use and carefully documented APIs. +* **Strict:** Full JSON and UTF-8 validation, lossless parsing. Performance with no compromises. +* **Automatic:** Selects a CPU-tailored parser at runtime. No configuration needed. +* **Reliable:** From memory allocation to error handling, simdjson's design avoids surprises. +* **Peer Reviewed:** Our research appears in venues like VLDB Journal, Software: Practice and Experience. + +This library is part of the [Awesome Modern C++](https://awesomecpp.com) list. + +Table of Contents +----------------- + +* [Real-world usage](#real-world-usage) +* [Quick Start](#quick-start) +* [Documentation](#documentation) +* [Godbolt](#godbolt) +* [Performance results](#performance-results) +* [Bindings and Ports of simdjson](#bindings-and-ports-of-simdjson) +* [About simdjson](#about-simdjson) +* [Funding](#funding) +* [Contributing to simdjson](#contributing-to-simdjson) +* [License](#license) + + +Real-world usage +---------------- + +- [Node.js](https://nodejs.org/) +- [ClickHouse](https://github.com/ClickHouse/ClickHouse) +- [Meta Velox](https://velox-lib.io) +- [Google Pax](https://github.com/google/paxml) +- [milvus](https://github.com/milvus-io/milvus) +- [QuestDB](https://questdb.io/blog/questdb-release-8-0-3/) +- [Clang Build Analyzer](https://github.com/aras-p/ClangBuildAnalyzer) +- [Shopify HeapProfiler](https://github.com/Shopify/heap-profiler) +- [StarRocks](https://github.com/StarRocks/starrocks) +- [Microsoft FishStore](https://github.com/microsoft/FishStore) +- [Intel PCM](https://github.com/intel/pcm) +- [WatermelonDB](https://github.com/Nozbe/WatermelonDB) +- [Apache Doris](https://github.com/apache/doris) +- [Dgraph](https://github.com/dgraph-io/dgraph) +- [UJRPC](https://github.com/unum-cloud/ujrpc) +- [fastgltf](https://github.com/spnda/fastgltf) +- [vast](https://github.com/tenzir/vast) +- [ada-url](https://github.com/ada-url/ada) +- [fastgron](https://github.com/adamritter/fastgron) +- [WasmEdge](https://wasmedge.org) + +If you are planning to use simdjson in a product, please work from one of our releases. + +Quick Start +----------- + +The simdjson library is easily consumable with a single .h and .cpp file. + +0. Prerequisites: `g++` (version 7 or better) or `clang++` (version 6 or better), and a 64-bit + system with a command-line shell (e.g., Linux, macOS, freeBSD). We also support programming + environments like Visual Studio and Xcode, but different steps are needed. Users of clang++ may need to specify the C++ version (e.g., `c++ -std=c++17`) since clang++ tends to default on C++98. +1. Pull [simdjson.h](singleheader/simdjson.h) and [simdjson.cpp](singleheader/simdjson.cpp) into a + directory, along with the sample file [twitter.json](jsonexamples/twitter.json). You can download them with the `wget` utility: + + ``` + wget https://raw.githubusercontent.com/simdjson/simdjson/master/singleheader/simdjson.h https://raw.githubusercontent.com/simdjson/simdjson/master/singleheader/simdjson.cpp https://raw.githubusercontent.com/simdjson/simdjson/master/jsonexamples/twitter.json + ``` +2. Create `quickstart.cpp`: + +```c++ +#include +#include "simdjson.h" +using namespace simdjson; +int main(void) { + ondemand::parser parser; + padded_string json = padded_string::load("twitter.json"); + ondemand::document tweets = parser.iterate(json); + std::cout << uint64_t(tweets["search_metadata"]["count"]) << " results." << std::endl; +} +``` +3. `c++ -o quickstart quickstart.cpp simdjson.cpp` +4. `./quickstart` + + ``` + 100 results. + ``` + + +Documentation +------------- + +Usage documentation is available: + +* [Basics](doc/basics.md) is an overview of how to use simdjson and its APIs. +* [Performance](doc/performance.md) shows some more advanced scenarios and how to tune for them. +* [Implementation Selection](doc/implementation-selection.md) describes runtime CPU detection and + how you can work with it. +* [API](https://simdjson.github.io/simdjson/) contains the automatically generated API documentation. + +Godbolt +------------- + +Some users may want to browse code along with the compiled assembly. You want to check out the following lists of examples: +* [simdjson examples with errors handled through exceptions](https://godbolt.org/z/7G5qE4sr9) +* [simdjson examples with errors without exceptions](https://godbolt.org/z/e9dWb9E4v) + +Performance results +------------------- + +The simdjson library uses three-quarters less instructions than state-of-the-art parser [RapidJSON](https://rapidjson.org). To our knowledge, simdjson is the first fully-validating JSON parser +to run at [gigabytes per second](https://en.wikipedia.org/wiki/Gigabyte) (GB/s) on commodity processors. It can parse millions of JSON documents per second on a single core. + +The following figure represents parsing speed in GB/s for parsing various files +on an Intel Skylake processor (3.4 GHz) using the GNU GCC 10 compiler (with the -O3 flag). +We compare against the best and fastest C++ libraries on benchmarks that load and process the data. +The simdjson library offers full unicode ([UTF-8](https://en.wikipedia.org/wiki/UTF-8)) validation and exact +number parsing. + + + +The simdjson library offers high speed whether it processes tiny files (e.g., 300 bytes) +or larger files (e.g., 3MB). The following plot presents parsing +speed for [synthetic files over various sizes generated with a script](https://github.com/simdjson/simdjson_experiments_vldb2019/blob/master/experiments/growing/gen.py) on a 3.4 GHz Skylake processor (GNU GCC 9, -O3). + + + +[All our experiments are reproducible](https://github.com/simdjson/simdjson_experiments_vldb2019). + + +For NDJSON files, we can exceed 3 GB/s with [our multithreaded parsing functions](https://github.com/simdjson/simdjson/blob/master/doc/parse_many.md). + + + + +Bindings and Ports of simdjson +------------------------------ + +We distinguish between "bindings" (which just wrap the C++ code) and a port to another programming language (which reimplements everything). + +- [ZippyJSON](https://github.com/michaeleisel/zippyjson): Swift bindings for the simdjson project. +- [libpy_simdjson](https://github.com/gerrymanoim/libpy_simdjson/): high-speed Python bindings for simdjson using [libpy](https://github.com/quantopian/libpy). +- [pysimdjson](https://github.com/TkTech/pysimdjson): Python bindings for the simdjson project. +- [cysimdjson](https://github.com/TeskaLabs/cysimdjson): high-speed Python bindings for the simdjson project. +- [simdjson-rs](https://github.com/simd-lite): Rust port. +- [simdjson-rust](https://github.com/SunDoge/simdjson-rust): Rust wrapper (bindings). +- [SimdJsonSharp](https://github.com/EgorBo/SimdJsonSharp): C# version for .NET Core (bindings and full port). +- [simdjson_nodejs](https://github.com/luizperes/simdjson_nodejs): Node.js bindings for the simdjson project. +- [simdjson_php](https://github.com/crazyxman/simdjson_php): PHP bindings for the simdjson project. +- [simdjson_ruby](https://github.com/saka1/simdjson_ruby): Ruby bindings for the simdjson project. +- [fast_jsonparser](https://github.com/anilmaurya/fast_jsonparser): Ruby bindings for the simdjson project. +- [simdjson-go](https://github.com/minio/simdjson-go): Go port using Golang assembly. +- [rcppsimdjson](https://github.com/eddelbuettel/rcppsimdjson): R bindings. +- [simdjson_erlang](https://github.com/ChomperT/simdjson_erlang): erlang bindings. +- [simdjsone](https://github.com/saleyn/simdjsone): erlang bindings. +- [lua-simdjson](https://github.com/FourierTransformer/lua-simdjson): lua bindings. +- [hermes-json](https://hackage.haskell.org/package/hermes-json): haskell bindings. +- [simdjzon](https://github.com/travisstaloch/simdjzon): zig port. +- [JSON-Simd](https://github.com/rawleyfowler/JSON-simd): Raku bindings. +- [JSON::SIMD](https://metacpan.org/pod/JSON::SIMD): Perl bindings; fully-featured JSON module that uses simdjson for decoding. +- [gemmaJSON](https://github.com/sainttttt/gemmaJSON): Nim JSON parser based on simdjson bindings. +- [simdjson-java](https://github.com/simdjson/simdjson-java): Java port. + +About simdjson +-------------- + +The simdjson library takes advantage of modern microarchitectures, parallelizing with SIMD vector +instructions, reducing branch misprediction, and reducing data dependency to take advantage of each +CPU's multiple execution cores. + +Our default front-end is called On Demand, and we wrote a paper about it: + +- John Keiser, Daniel Lemire, [On-Demand JSON: A Better Way to Parse Documents?](http://arxiv.org/abs/2312.17149), Software: Practice and Experience 54 (6), 2024. + +Some people [enjoy reading the first (2019) simdjson paper](https://arxiv.org/abs/1902.08318): A description of the design +and implementation of simdjson is in our research article: +- Geoff Langdale, Daniel Lemire, [Parsing Gigabytes of JSON per Second](https://arxiv.org/abs/1902.08318), VLDB Journal 28 (6), 2019. + +We have an in-depth paper focused on the UTF-8 validation: + +- John Keiser, Daniel Lemire, [Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), Software: Practice & Experience 51 (5), 2021. + +We also have an informal [blog post providing some background and context](https://branchfree.org/2019/02/25/paper-parsing-gigabytes-of-json-per-second/). + +For the video inclined,
+[![simdjson at QCon San Francisco 2019](http://img.youtube.com/vi/wlvKAT7SZIQ/0.jpg)](http://www.youtube.com/watch?v=wlvKAT7SZIQ)
+(It was the best voted talk, we're kinda proud of it.) + +Funding +------- + +The work is supported by the Natural Sciences and Engineering Research Council of Canada under grant +number RGPIN-2017-03910. + +[license]: LICENSE +[license img]: https://img.shields.io/badge/License-Apache%202-blue.svg + +Contributing to simdjson +------------------------ + +Head over to [CONTRIBUTING.md](CONTRIBUTING.md) for information on contributing to simdjson, and +[HACKING.md](HACKING.md) for information on source, building, and architecture/design. + +License +------- + +This code is made available under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html). + +Under Windows, we build some tools using the windows/dirent_portable.h file (which is outside our library code): it is under the liberal (business-friendly) MIT license. + +For compilers that do not support [C++17](https://en.wikipedia.org/wiki/C%2B%2B17), we bundle the string-view library which is published under the [Boost license](http://www.boost.org/LICENSE_1_0.txt). Like the Apache license, the Boost license is a permissive license allowing commercial redistribution. + +For efficient number serialization, we bundle Florian Loitsch's implementation of the Grisu2 algorithm for binary to decimal floating-point numbers. The implementation was slightly modified by JSON for Modern C++ library. Both Florian Loitsch's implementation and JSON for Modern C++ are provided under the MIT license. + +For runtime dispatching, we use some code from the PyTorch project licensed under 3-clause BSD. diff --git a/third-party/simdjson/simdjson.cpp b/third-party/simdjson/simdjson.cpp new file mode 100644 index 00000000..60a50ca5 --- /dev/null +++ b/third-party/simdjson/simdjson.cpp @@ -0,0 +1,55928 @@ +/* auto-generated on 2024-06-11 14:08:20 -0400. Do not edit! */ +/* including simdjson.cpp: */ +/* begin file simdjson.cpp */ +#define SIMDJSON_SRC_SIMDJSON_CPP + +/* including base.h: #include */ +/* begin file base.h */ +#ifndef SIMDJSON_SRC_BASE_H +#define SIMDJSON_SRC_BASE_H + +/* including simdjson/base.h: #include */ +/* begin file simdjson/base.h */ +/** + * @file Base declarations for all simdjson headers + * @private + */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* including simdjson/common_defs.h: #include "simdjson/common_defs.h" */ +/* begin file simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* including simdjson/compiler_check.h: #include "simdjson/compiler_check.h" */ +/* begin file simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file simdjson/compiler_check.h */ +/* including simdjson/portability.h: #include "simdjson/portability.h" */ +/* begin file simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +// We are using size_t without namespace std:: throughout the project +using std::size_t; + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__riscv) && __riscv_xlen == 64 +#define SIMDJSON_IS_RISCV64 1 +#elif defined(__loongarch_lp64) +#define SIMDJSON_IS_LOONGARCH64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#if defined(__ALTIVEC__) +#define SIMDJSON_IS_PPC64_VMX 1 +#endif // defined(__ALTIVEC__) +#else +#define SIMDJSON_IS_32BITS 1 + +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif + +#if SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +// In the future, we should allow programmers +// to get warning. +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +#define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ +#define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) +#define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, +// then do away with asserts and use __assume. +#if SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// This should only ever be enabled in debug mode. +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file simdjson/portability.h */ + +namespace simdjson { +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if SIMDJSON_CLANG_VISUAL_STUDIO + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused) + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* including simdjson/nonstd/string_view.hpp: #include "simdjson/nonstd/string_view.hpp" */ +/* begin file simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 8 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +#ifndef nssv_CONFIG_CONSTEXPR11_STD_SEARCH +# define nssv_CONFIG_CONSTEXPR11_STD_SEARCH 1 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if defined(_MSC_VER) +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++23 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202002L ) +#define nssv_CPP23_OR_GREATER ( nssv_CPLUSPLUS >= 202300L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +#include + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for string-view-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_IS_DEFAULT nssv_CPP11_140 +#define nssv_HAVE_IS_DELETE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif nssv_COMPILER_GNUC_VERSION >= 480 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif nssv_COMPILER_GNUC_VERSION >= 480 +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +// +// basic_string_view declaration: +// + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make search() non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return haystack.starts_with( needle ) ? haystack.begin() : + haystack.empty() ? haystack.end() : search( haystack.substr(1), needle ); +} + +#else // OPTIMIZE + +// non-recursive: + +#if nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return std::search( haystack.begin(), haystack.end(), needle.begin(), needle.end() ); +} + +#else // nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +template< class CharT, class Traits = std::char_traits > +nssv_constexpr14 const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + while ( needle.size() <= haystack.size() ) + { + if ( haystack.starts_with(needle) ) + { + return haystack.cbegin(); + } + haystack = basic_string_view{ haystack.begin() + 1, haystack.size() - 1U }; + } + return haystack.cend(); +} +#endif // nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +#endif // OPTIMIZE +#endif // nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + +} // namespace detail + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + +#if nssv_HAVE_NULLPTR +# if nssv_HAVE_IS_DELETE + nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept = delete; +# else + private: nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept; public: +# endif +#endif + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos : to_pos( +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + detail::search( substr(pos), v ) +#else + std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) +#endif + ); + } + + nssv_constexpr size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !sentry ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file simdjson/common_defs.h */ +/* skipped duplicate #include "simdjson/compiler_check.h" */ +/* including simdjson/error.h: #include "simdjson/error.h" */ +/* begin file simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + BIGINT_ERROR, ///< The integer value exceeds 64 bits + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether + * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code + * that was written while breaking some simdjson::ondemand requirement. They should not occur in released + * code after these issues were fixed. + */ + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file simdjson/error.h */ +/* skipped duplicate #include "simdjson/portability.h" */ + +/** + * @brief The top level simdjson namespace, containing everything the library provides. + */ +namespace simdjson { + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +class implementation; +struct padded_string; +class padded_string_view; +enum class stage1_mode; + +namespace internal { + +template +class atomic_ptr; +class dom_parser_implementation; +class escape_json_string; +class tape_ref; +struct value128; +enum class tape_type; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_BASE_H +/* end file simdjson/base.h */ + +#endif // SIMDJSON_SRC_BASE_H +/* end file base.h */ + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/* including to_chars.cpp: #include */ +/* begin file to_chars.cpp */ +#ifndef SIMDJSON_SRC_TO_CHARS_CPP +#define SIMDJSON_SRC_TO_CHARS_CPP + +/* skipped duplicate #include */ + +#include +#include +#include +#include + +namespace simdjson { +namespace internal { +/*! +implements the Grisu2 algorithm for binary to decimal floating-point +conversion. +Adapted from JSON for Modern C++ + +This implementation is a slightly modified version of the reference +implementation which may be obtained from +http://florian.loitsch.com/publications (bench.tar.gz). +The code is distributed under the MIT license, Copyright (c) 2009 Florian +Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing +Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the +ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation, +PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and +Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming +Language Design and Implementation, PLDI 1996 +*/ +namespace dtoa_impl { + +template +Target reinterpret_bits(const Source source) { + static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); + + Target target; + std::memcpy(&target, &source, sizeof(Source)); + return target; +} + +struct diyfp // f * 2^e +{ + static constexpr int kPrecision = 64; // = q + + std::uint64_t f = 0; + int e = 0; + + constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {} + + /*! + @brief returns x - y + @pre x.e == y.e and x.f >= y.f + */ + static diyfp sub(const diyfp &x, const diyfp &y) noexcept { + + return {x.f - y.f, x.e}; + } + + /*! + @brief returns x * y + @note The result is rounded. (Only the upper q bits are returned.) + */ + static diyfp mul(const diyfp &x, const diyfp &y) noexcept { + static_assert(kPrecision == 64, "internal error"); + + // Computes: + // f = round((x.f * y.f) / 2^q) + // e = x.e + y.e + q + + // Emulate the 64-bit * 64-bit multiplication: + // + // p = u * v + // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) + // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + + // 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 )) + // + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + + // 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) = + // (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + + // p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) + + // 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) + // + // (Since Q might be larger than 2^32 - 1) + // + // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) + // + // (Q_hi + H does not overflow a 64-bit int) + // + // = p_lo + 2^64 p_hi + + const std::uint64_t u_lo = x.f & 0xFFFFFFFFu; + const std::uint64_t u_hi = x.f >> 32u; + const std::uint64_t v_lo = y.f & 0xFFFFFFFFu; + const std::uint64_t v_hi = y.f >> 32u; + + const std::uint64_t p0 = u_lo * v_lo; + const std::uint64_t p1 = u_lo * v_hi; + const std::uint64_t p2 = u_hi * v_lo; + const std::uint64_t p3 = u_hi * v_hi; + + const std::uint64_t p0_hi = p0 >> 32u; + const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu; + const std::uint64_t p1_hi = p1 >> 32u; + const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu; + const std::uint64_t p2_hi = p2 >> 32u; + + std::uint64_t Q = p0_hi + p1_lo + p2_lo; + + // The full product might now be computed as + // + // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) + // p_lo = p0_lo + (Q << 32) + // + // But in this particular case here, the full p_lo is not required. + // Effectively we only need to add the highest bit in p_lo to p_hi (and + // Q_hi + 1 does not overflow). + + Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up + + const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u); + + return {h, x.e + y.e + 64}; + } + + /*! + @brief normalize x such that the significand is >= 2^(q-1) + @pre x.f != 0 + */ + static diyfp normalize(diyfp x) noexcept { + + while ((x.f >> 63u) == 0) { + x.f <<= 1u; + x.e--; + } + + return x; + } + + /*! + @brief normalize x such that the result has the exponent E + @pre e >= x.e and the upper e - x.e bits of x.f must be zero. + */ + static diyfp normalize_to(const diyfp &x, + const int target_exponent) noexcept { + const int delta = x.e - target_exponent; + + return {x.f << delta, target_exponent}; + } +}; + +struct boundaries { + diyfp w; + diyfp minus; + diyfp plus; +}; + +/*! +Compute the (normalized) diyfp representing the input number 'value' and its +boundaries. +@pre value must be finite and positive +*/ +template boundaries compute_boundaries(FloatType value) { + + // Convert the IEEE representation into a diyfp. + // + // If v is denormal: + // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) + // If v is normalized: + // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) + + static_assert(std::numeric_limits::is_iec559, + "internal error: dtoa_short requires an IEEE-754 " + "floating-point implementation"); + + constexpr int kPrecision = + std::numeric_limits::digits; // = p (includes the hidden bit) + constexpr int kBias = + std::numeric_limits::max_exponent - 1 + (kPrecision - 1); + constexpr int kMinExp = 1 - kBias; + constexpr std::uint64_t kHiddenBit = std::uint64_t{1} + << (kPrecision - 1); // = 2^(p-1) + + using bits_type = typename std::conditional::type; + + const std::uint64_t bits = reinterpret_bits(value); + const std::uint64_t E = bits >> (kPrecision - 1); + const std::uint64_t F = bits & (kHiddenBit - 1); + + const bool is_denormal = E == 0; + const diyfp v = is_denormal + ? diyfp(F, kMinExp) + : diyfp(F + kHiddenBit, static_cast(E) - kBias); + + // Compute the boundaries m- and m+ of the floating-point value + // v = f * 2^e. + // + // Determine v- and v+, the floating-point predecessor and successor if v, + // respectively. + // + // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) + // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) + // + // v+ = v + 2^e + // + // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ + // between m- and m+ round to v, regardless of how the input rounding + // algorithm breaks ties. + // + // ---+-------------+-------------+-------------+-------------+--- (A) + // v- m- v m+ v+ + // + // -----------------+------+------+-------------+-------------+--- (B) + // v- m- v m+ v+ + + const bool lower_boundary_is_closer = F == 0 && E > 1; + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_minus = lower_boundary_is_closer + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) + + // Determine the normalized w+ = m+. + const diyfp w_plus = diyfp::normalize(m_plus); + + // Determine w- = m- such that e_(w-) = e_(w+). + const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); + + return {diyfp::normalize(v), w_minus, w_plus}; +} + +// Given normalized diyfp w, Grisu needs to find a (normalized) cached +// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies +// within a certain range [alpha, gamma] (Definition 3.2 from [1]) +// +// alpha <= e = e_c + e_w + q <= gamma +// +// or +// +// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q +// <= f_c * f_w * 2^gamma +// +// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies +// +// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma +// +// or +// +// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) +// +// The choice of (alpha,gamma) determines the size of the table and the form of +// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well +// in practice: +// +// The idea is to cut the number c * w = f * 2^e into two parts, which can be +// processed independently: An integral part p1, and a fractional part p2: +// +// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e +// = (f div 2^-e) + (f mod 2^-e) * 2^e +// = p1 + p2 * 2^e +// +// The conversion of p1 into decimal form requires a series of divisions and +// modulos by (a power of) 10. These operations are faster for 32-bit than for +// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be +// achieved by choosing +// +// -e >= 32 or e <= -32 := gamma +// +// In order to convert the fractional part +// +// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... +// +// into decimal form, the fraction is repeatedly multiplied by 10 and the digits +// d[-i] are extracted in order: +// +// (10 * p2) div 2^-e = d[-1] +// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... +// +// The multiplication by 10 must not overflow. It is sufficient to choose +// +// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. +// +// Since p2 = f mod 2^-e < 2^-e, +// +// -e <= 60 or e >= -60 := alpha + +constexpr int kAlpha = -60; +constexpr int kGamma = -32; + +struct cached_power // c = f * 2^e ~= 10^k +{ + std::uint64_t f; + int e; + int k; +}; + +/*! +For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached +power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c +satisfies (Definition 3.2 from [1]) + alpha <= e_c + e + q <= gamma. +*/ +inline cached_power get_cached_power_for_binary_exponent(int e) { + // Now + // + // alpha <= e_c + e + q <= gamma (1) + // ==> f_c * 2^alpha <= c * 2^e * 2^q + // + // and since the c's are normalized, 2^(q-1) <= f_c, + // + // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) + // ==> 2^(alpha - e - 1) <= c + // + // If c were an exact power of ten, i.e. c = 10^k, one may determine k as + // + // k = ceil( log_10( 2^(alpha - e - 1) ) ) + // = ceil( (alpha - e - 1) * log_10(2) ) + // + // From the paper: + // "In theory the result of the procedure could be wrong since c is rounded, + // and the computation itself is approximated [...]. In practice, however, + // this simple function is sufficient." + // + // For IEEE double precision floating-point numbers converted into + // normalized diyfp's w = f * 2^e, with q = 64, + // + // e >= -1022 (min IEEE exponent) + // -52 (p - 1) + // -52 (p - 1, possibly normalize denormal IEEE numbers) + // -11 (normalize the diyfp) + // = -1137 + // + // and + // + // e <= +1023 (max IEEE exponent) + // -52 (p - 1) + // -11 (normalize the diyfp) + // = 960 + // + // This binary exponent range [-1137,960] results in a decimal exponent + // range [-307,324]. One does not need to store a cached power for each + // k in this range. For each such k it suffices to find a cached power + // such that the exponent of the product lies in [alpha,gamma]. + // This implies that the difference of the decimal exponents of adjacent + // table entries must be less than or equal to + // + // floor( (gamma - alpha) * log_10(2) ) = 8. + // + // (A smaller distance gamma-alpha would require a larger table.) + + // NB: + // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. + + constexpr int kCachedPowersMinDecExp = -300; + constexpr int kCachedPowersDecStep = 8; + + static constexpr std::array kCachedPowers = {{ + {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292}, + {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276}, + {0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260}, + {0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244}, + {0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228}, + {0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212}, + {0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196}, + {0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180}, + {0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164}, + {0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148}, + {0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132}, + {0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116}, + {0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100}, + {0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84}, + {0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68}, + {0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52}, + {0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36}, + {0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20}, + {0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4}, + {0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12}, + {0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28}, + {0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44}, + {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60}, + {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76}, + {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92}, + {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108}, + {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124}, + {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140}, + {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156}, + {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172}, + {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188}, + {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204}, + {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220}, + {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236}, + {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252}, + {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268}, + {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284}, + {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300}, + {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316}, + {0x9E19DB92B4E31BA9, 1013, 324}, + }}; + + // This computation gives exactly the same results for k as + // k = ceil((kAlpha - e - 1) * 0.30102999566398114) + // for |e| <= 1500, but doesn't require floating-point operations. + // NB: log_10(2) ~= 78913 / 2^18 + const int f = kAlpha - e - 1; + const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); + + const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / + kCachedPowersDecStep; + + const cached_power cached = kCachedPowers[static_cast(index)]; + + return cached; +} + +/*! +For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. +For n == 0, returns 1 and sets pow10 := 1. +*/ +inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) { + // LCOV_EXCL_START + if (n >= 1000000000) { + pow10 = 1000000000; + return 10; + } + // LCOV_EXCL_STOP + else if (n >= 100000000) { + pow10 = 100000000; + return 9; + } else if (n >= 10000000) { + pow10 = 10000000; + return 8; + } else if (n >= 1000000) { + pow10 = 1000000; + return 7; + } else if (n >= 100000) { + pow10 = 100000; + return 6; + } else if (n >= 10000) { + pow10 = 10000; + return 5; + } else if (n >= 1000) { + pow10 = 1000; + return 4; + } else if (n >= 100) { + pow10 = 100; + return 3; + } else if (n >= 10) { + pow10 = 10; + return 2; + } else { + pow10 = 1; + return 1; + } +} + +inline void grisu2_round(char *buf, int len, std::uint64_t dist, + std::uint64_t delta, std::uint64_t rest, + std::uint64_t ten_k) { + + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // ten_k + // <------> + // <---- rest ----> + // --------------[------------------+----+--------------]-------------- + // w V + // = buf * 10^k + // + // ten_k represents a unit-in-the-last-place in the decimal representation + // stored in buf. + // Decrement buf by ten_k while this takes buf closer to w. + + // The tests are written in this order to avoid overflow in unsigned + // integer arithmetic. + + while (rest < dist && delta - rest >= ten_k && + (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { + buf[len - 1]--; + rest += ten_k; + } +} + +/*! +Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. +M- and M+ must be normalized and share the same exponent -60 <= e <= -32. +*/ +inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent, + diyfp M_minus, diyfp w, diyfp M_plus) { + static_assert(kAlpha >= -60, "internal error"); + static_assert(kGamma <= -32, "internal error"); + + // Generates the digits (and the exponent) of a decimal floating-point + // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's + // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= + // gamma. + // + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // Grisu2 generates the digits of M+ from left to right and stops as soon as + // V is in [M-,M+]. + + std::uint64_t delta = + diyfp::sub(M_plus, M_minus) + .f; // (significand of (M+ - M-), implicit exponent is e) + std::uint64_t dist = + diyfp::sub(M_plus, w) + .f; // (significand of (M+ - w ), implicit exponent is e) + + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e + + const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e); + + auto p1 = static_cast( + M_plus.f >> + -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e + + // 1) + // + // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] + + std::uint32_t pow10; + const int k = find_largest_pow10(p1, pow10); + + // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) + // + // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) + // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) + // + // M+ = p1 + p2 * 2^e + // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e + // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e + // = d[k-1] * 10^(k-1) + ( rest) * 2^e + // + // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) + // + // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] + // + // but stop as soon as + // + // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e + + int n = k; + while (n > 0) { + // Invariants: + // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) + // pow10 = 10^(n-1) <= p1 < 10^n + // + const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) + const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) + // + // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e + // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) + // + p1 = r; + n--; + // + // M+ = buffer * 10^n + (p1 + p2 * 2^e) + // pow10 = 10^n + // + + // Now check if enough digits have been generated. + // Compute + // + // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e + // + // Note: + // Since rest and delta share the same exponent e, it suffices to + // compare the significands. + const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2; + if (rest <= delta) { + // V = buffer * 10^n, with M- <= V <= M+. + + decimal_exponent += n; + + // We may now just stop. But instead look if the buffer could be + // decremented to bring V closer to w. + // + // pow10 = 10^n is now 1 ulp in the decimal representation V. + // The rounding procedure works with diyfp's with an implicit + // exponent of e. + // + // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e + // + const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e; + grisu2_round(buffer, length, dist, delta, rest, ten_n); + + return; + } + + pow10 /= 10; + // + // pow10 = 10^(n-1) <= p1 < 10^n + // Invariants restored. + } + + // 2) + // + // The digits of the integral part have been generated: + // + // M+ = d[k-1]...d[1]d[0] + p2 * 2^e + // = buffer + p2 * 2^e + // + // Now generate the digits of the fractional part p2 * 2^e. + // + // Note: + // No decimal point is generated: the exponent is adjusted instead. + // + // p2 actually represents the fraction + // + // p2 * 2^e + // = p2 / 2^-e + // = d[-1] / 10^1 + d[-2] / 10^2 + ... + // + // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) + // + // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m + // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) + // + // using + // + // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) + // = ( d) * 2^-e + ( r) + // + // or + // 10^m * p2 * 2^e = d + r * 2^e + // + // i.e. + // + // M+ = buffer + p2 * 2^e + // = buffer + 10^-m * (d + r * 2^e) + // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e + // + // and stop as soon as 10^-m * r * 2^e <= delta * 2^e + + int m = 0; + for (;;) { + // Invariant: + // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) + // * 2^e + // = buffer * 10^-m + 10^-m * (p2 ) + // * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e = + // buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + + // (10*p2 mod 2^-e)) * 2^e + // + p2 *= 10; + const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e + const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e + // + // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) + // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + p2 = r; + m++; + // + // M+ = buffer * 10^-m + 10^-m * p2 * 2^e + // Invariant restored. + + // Check if enough digits have been generated. + // + // 10^-m * p2 * 2^e <= delta * 2^e + // p2 * 2^e <= 10^m * delta * 2^e + // p2 <= 10^m * delta + delta *= 10; + dist *= 10; + if (p2 <= delta) { + break; + } + } + + // V = buffer * 10^-m, with M- <= V <= M+. + + decimal_exponent -= m; + + // 1 ulp in the decimal representation is now 10^-m. + // Since delta and dist are now scaled by 10^m, we need to do the + // same with ulp in order to keep the units in sync. + // + // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e + // + const std::uint64_t ten_m = one.f; + grisu2_round(buffer, length, dist, delta, p2, ten_m); + + // By construction this algorithm generates the shortest possible decimal + // number (Loitsch, Theorem 6.2) which rounds back to w. + // For an input number of precision p, at least + // + // N = 1 + ceil(p * log_10(2)) + // + // decimal digits are sufficient to identify all binary floating-point + // numbers (Matula, "In-and-Out conversions"). + // This implies that the algorithm does not produce more than N decimal + // digits. + // + // N = 17 for p = 53 (IEEE double precision) + // N = 9 for p = 24 (IEEE single precision) +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus, + diyfp v, diyfp m_plus) { + + // --------(-----------------------+-----------------------)-------- (A) + // m- v m+ + // + // --------------------(-----------+-----------------------)-------- (B) + // m- v m+ + // + // First scale v (and m- and m+) such that the exponent is in the range + // [alpha, gamma]. + + const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); + + const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k + + // The exponent of the products is = v.e + c_minus_k.e + q and is in the range + // [alpha,gamma] + const diyfp w = diyfp::mul(v, c_minus_k); + const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); + const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); + + // ----(---+---)---------------(---+---)---------------(---+---)---- + // w- w w+ + // = c*m- = c*v = c*m+ + // + // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and + // w+ are now off by a small amount. + // In fact: + // + // w - v * 10^k < 1 ulp + // + // To account for this inaccuracy, add resp. subtract 1 ulp. + // + // --------+---[---------------(---+---)---------------]---+-------- + // w- M- w M+ w+ + // + // Now any number in [M-, M+] (bounds included) will round to w when input, + // regardless of how the input rounding algorithm breaks ties. + // + // And digit_gen generates the shortest possible such number in [M-, M+]. + // Note that this does not mean that Grisu2 always generates the shortest + // possible number in the interval (m-, m+). + const diyfp M_minus(w_minus.f + 1, w_minus.e); + const diyfp M_plus(w_plus.f - 1, w_plus.e); + + decimal_exponent = -cached.k; // = -(-k) = k + + grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +template +void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { + static_assert(diyfp::kPrecision >= std::numeric_limits::digits + 3, + "internal error: not enough precision"); + + // If the neighbors (and boundaries) of 'value' are always computed for + // double-precision numbers, all float's can be recovered using strtod (and + // strtof). However, the resulting decimal representations are not exactly + // "short". + // + // The documentation for 'std::to_chars' + // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is + // converted to a string as if by std::sprintf in the default ("C") locale" + // and since sprintf promotes float's to double's, I think this is exactly + // what 'std::to_chars' does. On the other hand, the documentation for + // 'std::to_chars' requires that "parsing the representation using the + // corresponding std::from_chars function recovers value exactly". That + // indicates that single precision floating-point numbers should be recovered + // using 'std::strtof'. + // + // NB: If the neighbors are computed for single-precision numbers, there is a + // single float + // (7.0385307e-26f) which can't be recovered using strtod. The resulting + // double precision value is off by 1 ulp. +#if 0 + const boundaries w = compute_boundaries(static_cast(value)); +#else + const boundaries w = compute_boundaries(value); +#endif + + grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); +} + +/*! +@brief appends a decimal representation of e to buf +@return a pointer to the element following the exponent. +@pre -1000 < e < 1000 +*/ +inline char *append_exponent(char *buf, int e) { + + if (e < 0) { + e = -e; + *buf++ = '-'; + } else { + *buf++ = '+'; + } + + auto k = static_cast(e); + if (k < 10) { + // Always print at least two digits in the exponent. + // This is for compatibility with printf("%g"). + *buf++ = '0'; + *buf++ = static_cast('0' + k); + } else if (k < 100) { + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } else { + *buf++ = static_cast('0' + k / 100); + k %= 100; + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } + + return buf; +} + +/*! +@brief prettify v = buf * 10^decimal_exponent +If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point +notation. Otherwise it will be printed in exponential notation. +@pre min_exp < 0 +@pre max_exp > 0 +*/ +inline char *format_buffer(char *buf, int len, int decimal_exponent, + int min_exp, int max_exp) { + + const int k = len; + const int n = len + decimal_exponent; + + // v = buf * 10^(n-k) + // k is the length of the buffer (number of decimal digits) + // n is the position of the decimal point relative to the start of the buffer. + + if (k <= n && n <= max_exp) { + // digits[000] + // len <= max_exp + 2 + + std::memset(buf + k, '0', static_cast(n) - static_cast(k)); + // Make it look like a floating-point number (#362, #378) + buf[n + 0] = '.'; + buf[n + 1] = '0'; + return buf + (static_cast(n)) + 2; + } + + if (0 < n && n <= max_exp) { + // dig.its + // len <= max_digits10 + 1 + std::memmove(buf + (static_cast(n) + 1), buf + n, + static_cast(k) - static_cast(n)); + buf[n] = '.'; + return buf + (static_cast(k) + 1U); + } + + if (min_exp < n && n <= 0) { + // 0.[000]digits + // len <= 2 + (-min_exp - 1) + max_digits10 + + std::memmove(buf + (2 + static_cast(-n)), buf, + static_cast(k)); + buf[0] = '0'; + buf[1] = '.'; + std::memset(buf + 2, '0', static_cast(-n)); + return buf + (2U + static_cast(-n) + static_cast(k)); + } + + if (k == 1) { + // dE+123 + // len <= 1 + 5 + + buf += 1; + } else { + // d.igitsE+123 + // len <= max_digits10 + 1 + 5 + + std::memmove(buf + 2, buf + 1, static_cast(k) - 1); + buf[1] = '.'; + buf += 1 + static_cast(k); + } + + *buf++ = 'e'; + return append_exponent(buf, n - 1); +} + +} // namespace dtoa_impl + +/*! +The format of the resulting decimal representation is similar to printf's %g +format. Returns an iterator pointing past-the-end of the decimal representation. +@note The input number must be finite, i.e. NaN's and Inf's are not supported. +@note The buffer must be large enough. +@note The result is NOT null-terminated. +*/ +char *to_chars(char *first, const char *last, double value) { + static_cast(last); // maybe unused - fix warning + bool negative = std::signbit(value); + if (negative) { + value = -value; + *first++ = '-'; + } + + if (value == 0) // +-0 + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) + *first++ = '.'; + *first++ = '0'; + return first; + } + // Compute v = buffer * 10^decimal_exponent. + // The decimal digits are stored in the buffer, which needs to be interpreted + // as an unsigned decimal integer. + // len is the length of the buffer, i.e. the number of decimal digits. + int len = 0; + int decimal_exponent = 0; + dtoa_impl::grisu2(first, len, decimal_exponent, value); + // Format the buffer like printf("%.*g", prec, value) + constexpr int kMinExp = -4; + constexpr int kMaxExp = std::numeric_limits::digits10; + + return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, + kMaxExp); +} +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_TO_CHARS_CPP +/* end file to_chars.cpp */ +/* including from_chars.cpp: #include */ +/* begin file from_chars.cpp */ +#ifndef SIMDJSON_SRC_FROM_CHARS_CPP +#define SIMDJSON_SRC_FROM_CHARS_CPP + +/* skipped duplicate #include */ + +#include +#include +#include + +namespace simdjson { +namespace internal { + +/** + * The code in the internal::from_chars function is meant to handle the floating-point number parsing + * when we have more than 19 digits in the decimal mantissa. This should only be seen + * in adversarial scenarios: we do not expect production systems to even produce + * such floating-point numbers. + * + * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) + * who credits Ken Thompson for the design (via a reference to the Go source + * code). See + * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c + * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c + * It is probably not very fast but it is a fallback that should almost never be + * called in real life. Google Wuffs is published under APL 2.0. + **/ + +namespace { +constexpr uint32_t max_digits = 768; +constexpr int32_t decimal_point_range = 2047; +} // namespace + +struct adjusted_mantissa { + uint64_t mantissa; + int power2; + adjusted_mantissa() : mantissa(0), power2(0) {} +}; + +struct decimal { + uint32_t num_digits; + int32_t decimal_point; + bool negative; + bool truncated; + uint8_t digits[max_digits]; +}; + +template struct binary_format { + static constexpr int mantissa_explicit_bits(); + static constexpr int minimum_exponent(); + static constexpr int infinite_power(); + static constexpr int sign_index(); +}; + +template <> constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} + +template <> constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> constexpr int binary_format::infinite_power() { + return 0x7FF; +} + +template <> constexpr int binary_format::sign_index() { return 63; } + +bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } + +// This should always succeed since it follows a call to parse_number. +decimal parse_decimal(const char *&p) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while (*p == '0') { + ++p; + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if (*p == '.') { + ++p; + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while (is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +namespace { + +// remove all final zeroes +inline void trim(decimal &h) { + while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { + h.num_digits--; + } +} + +uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { + shift &= 63; + const static uint16_t number_of_digits_decimal_left_shift_table[65] = { + 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, + 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, + 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, + 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, + 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, + 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, + 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, + 0x051C, 0x051C, + }; + uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; + uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; + uint32_t num_new_digits = x_a >> 11; + uint32_t pow5_a = 0x7FF & x_a; + uint32_t pow5_b = 0x7FF & x_b; + const static uint8_t + number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { + 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, + 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, + 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, + 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, + 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, + 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, + 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, + 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, + 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, + 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, + 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, + 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, + 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, + 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, + 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, + 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, + 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, + 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, + 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, + 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, + 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, + 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, + 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, + 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, + 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, + 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, + 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, + 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, + 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, + 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, + 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, + 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, + 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, + 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, + 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, + 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, + 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, + 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, + 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, + 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, + 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, + 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, + 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, + 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, + 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, + 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, + 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, + 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, + 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, + 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, + 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, + 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, + 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, + 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, + 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, + 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, + 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, + }; + const uint8_t *pow5 = + &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; + uint32_t i = 0; + uint32_t n = pow5_b - pow5_a; + for (; i < n; i++) { + if (i >= h.num_digits) { + return num_new_digits - 1; + } else if (h.digits[i] == pow5[i]) { + continue; + } else if (h.digits[i] < pow5[i]) { + return num_new_digits - 1; + } else { + return num_new_digits; + } + } + return num_new_digits; +} + +} // end of anonymous namespace + +uint64_t round(decimal &h) { + if ((h.num_digits == 0) || (h.decimal_point < 0)) { + return 0; + } else if (h.decimal_point > 18) { + return UINT64_MAX; + } + // at this point, we know that h.decimal_point >= 0 + uint32_t dp = uint32_t(h.decimal_point); + uint64_t n = 0; + for (uint32_t i = 0; i < dp; i++) { + n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); + } + bool round_up = false; + if (dp < h.num_digits) { + round_up = h.digits[dp] >= 5; // normally, we round up + // but we may need to round to even! + if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { + round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); + } + } + if (round_up) { + n++; + } + return n; +} + +// computes h * 2^-shift +void decimal_left_shift(decimal &h, uint32_t shift) { + if (h.num_digits == 0) { + return; + } + uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); + int32_t read_index = int32_t(h.num_digits - 1); + uint32_t write_index = h.num_digits - 1 + num_new_digits; + uint64_t n = 0; + + while (read_index >= 0) { + n += uint64_t(h.digits[read_index]) << shift; + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + read_index--; + } + while (n > 0) { + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + } + h.num_digits += num_new_digits; + if (h.num_digits > max_digits) { + h.num_digits = max_digits; + } + h.decimal_point += int32_t(num_new_digits); + trim(h); +} + +// computes h * 2^shift +void decimal_right_shift(decimal &h, uint32_t shift) { + uint32_t read_index = 0; + uint32_t write_index = 0; + + uint64_t n = 0; + + while ((n >> shift) == 0) { + if (read_index < h.num_digits) { + n = (10 * n) + h.digits[read_index++]; + } else if (n == 0) { + return; + } else { + while ((n >> shift) == 0) { + n = 10 * n; + read_index++; + } + break; + } + } + h.decimal_point -= int32_t(read_index - 1); + if (h.decimal_point < -decimal_point_range) { // it is zero + h.num_digits = 0; + h.decimal_point = 0; + h.negative = false; + h.truncated = false; + return; + } + uint64_t mask = (uint64_t(1) << shift) - 1; + while (read_index < h.num_digits) { + uint8_t new_digit = uint8_t(n >> shift); + n = (10 * (n & mask)) + h.digits[read_index++]; + h.digits[write_index++] = new_digit; + } + while (n > 0) { + uint8_t new_digit = uint8_t(n >> shift); + n = 10 * (n & mask); + if (write_index < max_digits) { + h.digits[write_index++] = new_digit; + } else if (new_digit > 0) { + h.truncated = true; + } + } + h.num_digits = write_index; + trim(h); +} + +template adjusted_mantissa compute_float(decimal &d) { + adjusted_mantissa answer; + if (d.num_digits == 0) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + // At this point, going further, we can assume that d.num_digits > 0. + // We want to guard against excessive decimal point values because + // they can result in long running times. Indeed, we do + // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 + // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not + // fine (runs for a long time). + // + if(d.decimal_point < -324) { + // We have something smaller than 1e-324 which is always zero + // in binary64 and binary32. + // It should be zero. + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } else if(d.decimal_point >= 310) { + // We have something at least as large as 0.1e310 which is + // always infinite. + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + static const uint32_t max_shift = 60; + static const uint32_t num_powers = 19; + static const uint8_t powers[19] = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // + 33, 36, 39, 43, 46, 49, 53, 56, 59, // + }; + int32_t exp2 = 0; + while (d.decimal_point > 0) { + uint32_t n = uint32_t(d.decimal_point); + uint32_t shift = (n < num_powers) ? powers[n] : max_shift; + decimal_right_shift(d, shift); + if (d.decimal_point < -decimal_point_range) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + exp2 += int32_t(shift); + } + // We shift left toward [1/2 ... 1]. + while (d.decimal_point <= 0) { + uint32_t shift; + if (d.decimal_point == 0) { + if (d.digits[0] >= 5) { + break; + } + shift = (d.digits[0] < 2) ? 2 : 1; + } else { + uint32_t n = uint32_t(-d.decimal_point); + shift = (n < num_powers) ? powers[n] : max_shift; + } + decimal_left_shift(d, shift); + if (d.decimal_point > decimal_point_range) { + // we want to get infinity: + answer.power2 = 0xFF; + answer.mantissa = 0; + return answer; + } + exp2 -= int32_t(shift); + } + // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. + exp2--; + constexpr int32_t minimum_exponent = binary::minimum_exponent(); + while ((minimum_exponent + 1) > exp2) { + uint32_t n = uint32_t((minimum_exponent + 1) - exp2); + if (n > max_shift) { + n = max_shift; + } + decimal_right_shift(d, n); + exp2 += int32_t(n); + } + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; + decimal_left_shift(d, mantissa_size_in_bits); + + uint64_t mantissa = round(d); + // It is possible that we have an overflow, in which case we need + // to shift back. + if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { + decimal_right_shift(d, 1); + exp2 += 1; + mantissa = round(d); + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + } + answer.power2 = exp2 - binary::minimum_exponent(); + if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { + answer.power2--; + } + answer.mantissa = + mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); + return answer; +} + +template +adjusted_mantissa parse_long_mantissa(const char *first) { + decimal d = parse_decimal(first); + return compute_float(d); +} + +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + +double from_chars(const char *first) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + +} // internal +} // simdjson + +#endif // SIMDJSON_SRC_FROM_CHARS_CPP +/* end file from_chars.cpp */ +/* including internal/error_tables.cpp: #include */ +/* begin file internal/error_tables.cpp */ +#ifndef SIMDJSON_SRC_ERROR_TABLES_CPP +#define SIMDJSON_SRC_ERROR_TABLES_CPP + +/* including simdjson/internal/jsoncharutils_tables.h: #include */ +/* begin file simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file simdjson/internal/jsoncharutils_tables.h */ +/* including simdjson/error-inl.h: #include */ +/* begin file simdjson/error-inl.h */ +#ifndef SIMDJSON_ERROR_INL_H +#define SIMDJSON_ERROR_INL_H + +/* skipped duplicate #include "simdjson/error.h" */ + +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_ERROR_INL_H +/* end file simdjson/error-inl.h */ + +namespace simdjson { +namespace internal { + + SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] { + { SUCCESS, "SUCCESS: No error" }, + { CAPACITY, "CAPACITY: This parser can't support a document that big" }, + { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" }, + { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" }, + { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" }, + { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" }, + { F_ATOM_ERROR, "F_ATOM_ERROR: Problem while parsing an atom starting with the letter 'f'" }, + { N_ATOM_ERROR, "N_ATOM_ERROR: Problem while parsing an atom starting with the letter 'n'" }, + { NUMBER_ERROR, "NUMBER_ERROR: Problem while parsing a number" }, + { BIGINT_ERROR, "BIGINT_ERROR: Big integer value that cannot be represented using 64 bits" }, + { UTF8_ERROR, "UTF8_ERROR: The input is not valid UTF-8" }, + { UNINITIALIZED, "UNINITIALIZED: Uninitialized" }, + { EMPTY, "EMPTY: no JSON found" }, + { UNESCAPED_CHARS, "UNESCAPED_CHARS: Within strings, some characters must be escaped, we found unescaped characters" }, + { UNCLOSED_STRING, "UNCLOSED_STRING: A string is opened, but never closed." }, + { UNSUPPORTED_ARCHITECTURE, "UNSUPPORTED_ARCHITECTURE: simdjson does not have an implementation supported by this CPU architecture. Please report this error to the core team as it should never happen." }, + { INCORRECT_TYPE, "INCORRECT_TYPE: The JSON element does not have the requested type." }, + { NUMBER_OUT_OF_RANGE, "NUMBER_OUT_OF_RANGE: The JSON number is too large or too small to fit within the requested type." }, + { INDEX_OUT_OF_BOUNDS, "INDEX_OUT_OF_BOUNDS: Attempted to access an element of a JSON array that is beyond its length." }, + { NO_SUCH_FIELD, "NO_SUCH_FIELD: The JSON field referenced does not exist in this object." }, + { IO_ERROR, "IO_ERROR: Error reading the file." }, + { INVALID_JSON_POINTER, "INVALID_JSON_POINTER: Invalid JSON pointer syntax." }, + { INVALID_URI_FRAGMENT, "INVALID_URI_FRAGMENT: Invalid URI fragment syntax." }, + { UNEXPECTED_ERROR, "UNEXPECTED_ERROR: Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, + { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." }, + { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." }, + { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." }, + { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, + { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, + { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} + }; // error_messages[] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_ERROR_TABLES_CPP +/* end file internal/error_tables.cpp */ +/* including internal/jsoncharutils_tables.cpp: #include */ +/* begin file internal/jsoncharutils_tables.cpp */ +#ifndef SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP +#define SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP + +/* skipped duplicate #include */ + +namespace simdjson { +namespace internal { + +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa, + 0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe, + 0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x10, 0x20, 0x30, 0x40, 0x50, + 0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0, + 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, + 0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x100, 0x200, 0x300, 0x400, 0x500, + 0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00, + 0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00, + 0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, + 0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000, + 0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000, + 0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP +/* end file internal/jsoncharutils_tables.cpp */ +/* including internal/numberparsing_tables.cpp: #include */ +/* begin file internal/numberparsing_tables.cpp */ +#ifndef SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP +#define SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP + +/* skipped duplicate #include */ +/* including simdjson/internal/numberparsing_tables.h: #include */ +/* begin file simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file simdjson/internal/numberparsing_tables.h */ + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +SIMDJSON_DLLIMPORTEXPORT const double simdjson::internal::power_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; + +#endif // SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP +/* end file internal/numberparsing_tables.cpp */ +/* including internal/simdprune_tables.cpp: #include */ +/* begin file internal/simdprune_tables.cpp */ +#ifndef SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP +#define SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP + +/* including simdjson/implementation_detection.h: #include */ +/* begin file simdjson/implementation_detection.h */ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H + +/* skipped duplicate #include "simdjson/base.h" */ + +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#define SIMDJSON_IMPLEMENTATION_ID_lsx 7 +#define SIMDJSON_IMPLEMENTATION_ID_lasx 8 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 0 +#endif + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#endif + +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif + +#if (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 0 +#endif + +#ifndef SIMDJSON_IMPLEMENTATION_LASX +#define SIMDJSON_IMPLEMENTATION_LASX (SIMDJSON_IS_LOONGARCH64 && __loongarch_asx) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LASX (SIMDJSON_IMPLEMENTATION_LASX) + +#ifndef SIMDJSON_IMPLEMENTATION_LSX +#if SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_IMPLEMENTATION_LSX 0 +#else +#define SIMDJSON_IMPLEMENTATION_LSX (SIMDJSON_IS_LOONGARCH64 && __loongarch_sx) +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LSX (SIMDJSON_IMPLEMENTATION_LSX) + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 || SIMDJSON_CAN_ALWAYS_RUN_LSX || SIMDJSON_CAN_ALWAYS_RUN_LASX +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_LSX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lsx +#elif SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lasx +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H +/* end file simdjson/implementation_detection.h */ + +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 || SIMDJSON_IMPLEMENTATION_LSX || SIMDJSON_IMPLEMENTATION_LASX + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable +SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +// 256 * 8 bytes = 2kB, easily fits in cache. +SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; //static uint64_t thintable_epi8[256] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 || SIMDJSON_IMPLEMENTATION_LSX || SIMDJSON_IMPLEMENTATION_LASX + +#endif // SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP +/* end file internal/simdprune_tables.cpp */ + +/* including simdjson/generic/dependencies.h: #include */ +/* begin file simdjson/generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H + +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/implementation.h: #include "simdjson/implementation.h" */ +/* begin file simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* including simdjson/internal/atomic_ptr.h: #include "simdjson/internal/atomic_ptr.h" */ +/* begin file simdjson/internal/atomic_ptr.h */ +#ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H +#define SIMDJSON_INTERNAL_ATOMIC_PTR_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include + +namespace simdjson { +namespace internal { + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H +/* end file simdjson/internal/atomic_ptr.h */ +/* including simdjson/internal/dom_parser_implementation.h: #include "simdjson/internal/dom_parser_implementation.h" */ +/* begin file simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/internal/dom_parser_implementation.h */ + +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual std::string name() const { return std::string(_name); } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual std::string description() const { return std::string(_description); } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } +protected: + ~implementation() = default; + +private: + /** + * The name of this implementation. + */ + std::string_view _name; + + /** + * The description of this implementation. + */ + std::string_view _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file simdjson/implementation.h */ +/* skipped duplicate #include "simdjson/implementation_detection.h" */ +/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ +/* begin file simdjson/internal/instruction_set.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + +namespace simdjson { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + LSX = 0x20000, + LASX = 0x40000, +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H +/* end file simdjson/internal/instruction_set.h */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* skipped duplicate #include "simdjson/internal/jsoncharutils_tables.h" */ +/* skipped duplicate #include "simdjson/internal/numberparsing_tables.h" */ +/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ +/* begin file simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H +/* end file simdjson/generic/dependencies.h */ +/* including generic/dependencies.h: #include */ +/* begin file generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_SRC_GENERIC_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_DEPENDENCIES_H + +/* skipped duplicate #include */ + +#endif // SIMDJSON_SRC_GENERIC_DEPENDENCIES_H +/* end file generic/dependencies.h */ +/* including generic/stage1/dependencies.h: #include */ +/* begin file generic/stage1/dependencies.h */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H +/* end file generic/stage1/dependencies.h */ +/* including generic/stage2/dependencies.h: #include */ +/* begin file generic/stage2/dependencies.h */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H + +/* including simdjson/dom/document.h: #include */ +/* begin file simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +/* including simdjson/dom/base.h: #include "simdjson/dom/base.h" */ +/* begin file simdjson/dom/base.h */ +#ifndef SIMDJSON_DOM_BASE_H +#define SIMDJSON_DOM_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { + +/** + * @brief A DOM API on top of the simdjson parser. + */ +namespace dom { + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +class array; +class document; +class document_stream; +class element; +class key_value_pair; +class object; +class parser; + +#ifdef SIMDJSON_THREADS_ENABLED +struct stage1_worker; +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +namespace internal { + +template +class string_builder; +class tape_ref; + +} // namespace internal + +} // namespace simdjson + +#endif // SIMDJSON_DOM_BASE_H +/* end file simdjson/dom/base.h */ + +#include + +namespace simdjson { +namespace dom { + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/document.h */ +/* including simdjson/internal/tape_type.h: #include */ +/* begin file simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file simdjson/internal/tape_type.h */ + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H +/* end file generic/stage2/dependencies.h */ + +/* including implementation.cpp: #include */ +/* begin file implementation.cpp */ +#ifndef SIMDJSON_SRC_IMPLEMENTATION_CPP +#define SIMDJSON_SRC_IMPLEMENTATION_CPP + +/* skipped duplicate #include */ +/* skipped duplicate #include */ +/* skipped duplicate #include */ +/* including internal/isadetection.h: #include */ +/* begin file internal/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_ISADETECTION_H +#define SIMDJSON_INTERNAL_ISADETECTION_H + +/* skipped duplicate #include "simdjson/internal/instruction_set.h" */ + +#include +#include +#if defined(_MSC_VER) +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif + +namespace simdjson { +namespace internal { + +#if defined(__PPC64__) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; +} + +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; +} + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + + +namespace { +// Can be found on Intel ISA Reference for CPUID +constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 +constexpr uint64_t cpuid_avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX +constexpr uint64_t cpuid_avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM +constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 +constexpr uint32_t cpuid_osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 +constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +} + + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuidex(cpu_info, *eax, *ecx); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif +} + + +static inline uint64_t xgetbv() { +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t xcr0_lo, xcr0_hi; + asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | (uint64_t(xcr0_hi) << 32); +#endif +} + +static inline uint32_t detect_supported_architectures() { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + + // EBX for EAX=0x1 + eax = 0x1; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= instruction_set::SSE42; + } else { + return host_isa; // everything after is redundant + } + + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= instruction_set::PCLMULQDQ; + } + + + if ((ecx & cpuid_osxsave) != cpuid_osxsave) { + return host_isa; + } + + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); + + if ((xcr0 & cpuid_avx256_saved) == 0) { + return host_isa; + } + + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= instruction_set::BMI1; + } + + if (ebx & cpuid_bmi2_bit) { + host_isa |= instruction_set::BMI2; + } + + if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) { + return host_isa; + } + + if (ebx & cpuid_avx512f_bit) { + host_isa |= instruction_set::AVX512F; + } + + if (ebx & cpuid_avx512dq_bit) { + host_isa |= instruction_set::AVX512DQ; + } + + if (ebx & cpuid_avx512ifma_bit) { + host_isa |= instruction_set::AVX512IFMA; + } + + if (ebx & cpuid_avx512pf_bit) { + host_isa |= instruction_set::AVX512PF; + } + + if (ebx & cpuid_avx512er_bit) { + host_isa |= instruction_set::AVX512ER; + } + + if (ebx & cpuid_avx512cd_bit) { + host_isa |= instruction_set::AVX512CD; + } + + if (ebx & cpuid_avx512bw_bit) { + host_isa |= instruction_set::AVX512BW; + } + + if (ebx & cpuid_avx512vl_bit) { + host_isa |= instruction_set::AVX512VL; + } + + if (ecx & cpuid_avx512vbmi2_bit) { + host_isa |= instruction_set::AVX512VBMI2; + } + + return host_isa; +} + +#elif defined(__loongarch_sx) && !defined(__loongarch_asx) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::LSX; +} + +#elif defined(__loongarch_asx) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::LASX; +} + +#else // fallback + + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + + +#endif // end SIMD extension detection code + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ISADETECTION_H +/* end file internal/isadetection.h */ + +#include +#include + +namespace simdjson { + +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets); +} + +} // namespace simdjson + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* including simdjson/arm64/implementation.h: #include */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ +namespace simdjson { +namespace internal { +static const arm64::implementation* get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_ARM64 + +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* including simdjson/fallback/implementation.h: #include */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ +namespace simdjson { +namespace internal { +static const fallback::implementation* get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK + + +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* including simdjson/haswell/implementation.h: #include */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ +namespace simdjson { +namespace internal { +static const haswell::implementation* get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +} // namespace internal +} // namespace simdjson +#endif + +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* including simdjson/icelake/implementation.h: #include */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ +namespace simdjson { +namespace internal { +static const icelake::implementation* get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +} // namespace internal +} // namespace simdjson +#endif + +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* including simdjson/ppc64/implementation.h: #include */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ +namespace simdjson { +namespace internal { +static const ppc64::implementation* get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_PPC64 + +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* including simdjson/westmere/implementation.h: #include */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::westmere::implementation* get_westmere_singleton() { + static const simdjson::westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE + +#if SIMDJSON_IMPLEMENTATION_LSX +/* including simdjson/lsx/implementation.h: #include */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::lsx::implementation* get_lsx_singleton() { + static const simdjson::lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_LSX + +#if SIMDJSON_IMPLEMENTATION_LASX +/* including simdjson/lasx/implementation.h: #include */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::lasx::implementation* get_lasx_singleton() { + static const simdjson::lasx::implementation lasx_singleton{}; + return &lasx_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_LASX + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace internal { + +// When there is a single implementation, we should not pay a price +// for dispatching to the best implementation. We should just use the +// one we have. This is a compile-time check. +#define SIMDJSON_SINGLE_IMPLEMENTATION (SIMDJSON_IMPLEMENTATION_ICELAKE \ + + SIMDJSON_IMPLEMENTATION_HASWELL + SIMDJSON_IMPLEMENTATION_WESTMERE \ + + SIMDJSON_IMPLEMENTATION_ARM64 + SIMDJSON_IMPLEMENTATION_PPC64 \ + + SIMDJSON_IMPLEMENTATION_LSX + SIMDJSON_IMPLEMENTATION_LASX \ + + SIMDJSON_IMPLEMENTATION_FALLBACK == 1) + +#if SIMDJSON_SINGLE_IMPLEMENTATION + static const implementation* get_single_implementation() { + return +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_LSX + get_lsx_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_LASX + get_lasx_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(); +#endif +} +#endif + +// Static array of known implementations. We're hoping these get baked into the executable +// without requiring a static initializer. + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final : public implementation { +public: + std::string name() const noexcept final { return set_best()->name(); } + std::string description() const noexcept final { return set_best()->description(); } + uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final { + return set_best()->create_dom_parser_implementation(capacity, max_length, dst); + } + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { + return set_best()->minify(buf, len, dst, dst_len); + } + simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } + simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} +private: + const implementation *set_best() const noexcept; +}; + +static_assert(std::is_trivially_destructible::value, "detect_best_supported_implementation_on_first_use should be trivially destructible"); + +static const std::initializer_list& get_available_implementation_pointers() { + static const std::initializer_list available_implementation_pointers { +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_LASX + get_lasx_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support +class unsupported_implementation final : public implementation { +public: + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t, + size_t, + std::unique_ptr& + ) const noexcept final { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback implementation + // it seems unlikely that unsupported_implementation will ever be used. If it is used, + // then it will flag all strings as invalid. The alternative is to return an error_code + // from which the user has to figure out whether the string is valid UTF-8... which seems + // like a lot of work just to handle the very unlikely case that we have an unsupported + // implementation. And, when it does happen (that we have an unsupported implementation), + // what are the chances that the programmer has a fallback? Given that *we* provide the + // fallback, it implies that the programmer would need a fallback for our fallback. + } + unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +static_assert(std::is_trivially_destructible::value, "unsupported_singleton should be trivially destructible"); + +const unsupported_implementation* get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation * const *available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation * const *available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation *available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + for (const implementation *impl : internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() { + static const internal::available_implementation_list available_implementations{}; + return available_implementations; +} + +SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { +#if SIMDJSON_SINGLE_IMPLEMENTATION + // We immediately select the only implementation we have, skipping the + // detect_best_supported_implementation_on_first_use_singleton. + static internal::atomic_ptr active_implementation{internal::get_single_implementation()}; + return active_implementation; +#else + static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +#endif +} + +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { + return get_active_implementation()->minify(reinterpret_cast(buf), len, reinterpret_cast(dst), dst_len); +} +simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_active_implementation()->validate_utf8(buf, len); +} +const implementation * builtin_implementation() { + static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; + assert(builtin_impl); + return builtin_impl; +} + +} // namespace simdjson + +#endif // SIMDJSON_SRC_IMPLEMENTATION_CPP +/* end file implementation.cpp */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* including arm64.cpp: #include */ +/* begin file arm64.cpp */ +#ifndef SIMDJSON_SRC_ARM64_CPP +#define SIMDJSON_SRC_ARM64_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/arm64.h: #include */ +/* begin file simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for arm64 */ +/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for arm64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for arm64 */ +/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for arm64 */ +/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for arm64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +/* end file simdjson/generic/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_H +/* end file simdjson/arm64.h */ +/* including simdjson/arm64/implementation.h: #include */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ + +/* including simdjson/arm64/begin.h: #include */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including generic/amalgamated.h for arm64: #include */ +/* begin file generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for arm64: #include */ +/* begin file generic/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for arm64 */ +/* including generic/dom_parser_implementation.h for arm64: #include */ +/* begin file generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace arm64 { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for arm64 */ +/* including generic/json_character_block.h for arm64: #include */ +/* begin file generic/json_character_block.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for arm64 */ +/* end file generic/amalgamated.h for arm64 */ +/* including generic/stage1/amalgamated.h for arm64: #include */ +/* begin file generic/stage1/amalgamated.h for arm64 */ +// Stuff other things depend on +/* including generic/stage1/base.h for arm64: #include */ +/* begin file generic/stage1/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for arm64 */ +/* including generic/stage1/buf_block_reader.h for arm64: #include */ +/* begin file generic/stage1/buf_block_reader.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for arm64 */ +/* including generic/stage1/json_escape_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_escape_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for arm64 */ +/* including generic/stage1/json_string_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_string_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for arm64 */ +/* including generic/stage1/utf8_lookup4_algorithm.h for arm64: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for arm64 */ +/* including generic/stage1/json_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for arm64 */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for arm64: #include */ +/* begin file generic/stage1/find_next_document_index.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for arm64 */ +/* including generic/stage1/json_minifier.h for arm64: #include */ +/* begin file generic/stage1/json_minifier.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for arm64 */ +/* including generic/stage1/json_structural_indexer.h for arm64: #include */ +/* begin file generic/stage1/json_structural_indexer.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for arm64 */ +/* including generic/stage1/utf8_validator.h for arm64: #include */ +/* begin file generic/stage1/utf8_validator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for arm64 */ +/* end file generic/stage1/amalgamated.h for arm64 */ +/* including generic/stage2/amalgamated.h for arm64: #include */ +/* begin file generic/stage2/amalgamated.h for arm64 */ +// Stuff other things depend on +/* including generic/stage2/base.h for arm64: #include */ +/* begin file generic/stage2/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for arm64 */ +/* including generic/stage2/tape_writer.h for arm64: #include */ +/* begin file generic/stage2/tape_writer.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for arm64 */ +/* including generic/stage2/logger.h for arm64: #include */ +/* begin file generic/stage2/logger.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace arm64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for arm64 */ +/* including generic/stage2/stringparsing.h for arm64: #include */ +/* begin file generic/stage2/stringparsing.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for arm64 */ +/* including generic/stage2/structural_iterator.h for arm64: #include */ +/* begin file generic/stage2/structural_iterator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for arm64 */ +/* including generic/stage2/tape_builder.h for arm64: #include */ +/* begin file generic/stage2/tape_builder.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for arm64 */ +/* end file generic/stage2/amalgamated.h for arm64 */ + +// +// Stage 1 +// +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). *However* if we only need spaces, + // it is likely that we will still compute 'v' above with two lookup_16: one + // could do it a bit cheaper. This is in contrast with the x64 implementations + // where we can, efficiently, do the white space and structural matching + // separately. One reason for this difference is that on ARM NEON, the table + // lookups either zero or leave unchanged the characters exceeding 0xF whereas + // on x64, the equivalent instruction (pshufb) automatically applies a mask, + // ignoring the 4 most significant bits. Thus the x64 implementation is + // optimized differently. This being said, if you use this code strictly + // just for minification (or just to identify the structural characters), + // there is a small untaken optimization opportunity here. We deliberately + // do not pick it up. + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0x80u; +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0xc0u); + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. + // This will work fine because we only have to report errors for cases with 0-1 lead bytes. + // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is + // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. + // The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return arm64::stringparsing::parse_string(src, dst, allow_replacement); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return arm64::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace arm64 +} // namespace simdjson + +/* including simdjson/arm64/end.h: #include */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_SRC_ARM64_CPP +/* end file arm64.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* including haswell.cpp: #include */ +/* begin file haswell.cpp */ +#ifndef SIMDJSON_SRC_HASWELL_CPP +#define SIMDJSON_SRC_HASWELL_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/haswell.h: #include */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for haswell */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +/* including simdjson/haswell/implementation.h: #include */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ + +/* including simdjson/haswell/begin.h: #include */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including generic/amalgamated.h for haswell: #include */ +/* begin file generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for haswell: #include */ +/* begin file generic/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for haswell */ +/* including generic/dom_parser_implementation.h for haswell: #include */ +/* begin file generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace haswell { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for haswell */ +/* including generic/json_character_block.h for haswell: #include */ +/* begin file generic/json_character_block.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for haswell */ +/* end file generic/amalgamated.h for haswell */ +/* including generic/stage1/amalgamated.h for haswell: #include */ +/* begin file generic/stage1/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage1/base.h for haswell: #include */ +/* begin file generic/stage1/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for haswell */ +/* including generic/stage1/buf_block_reader.h for haswell: #include */ +/* begin file generic/stage1/buf_block_reader.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for haswell */ +/* including generic/stage1/json_escape_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_escape_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for haswell */ +/* including generic/stage1/json_string_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_string_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for haswell */ +/* including generic/stage1/utf8_lookup4_algorithm.h for haswell: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +/* including generic/stage1/json_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for haswell */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for haswell: #include */ +/* begin file generic/stage1/find_next_document_index.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for haswell */ +/* including generic/stage1/json_minifier.h for haswell: #include */ +/* begin file generic/stage1/json_minifier.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for haswell */ +/* including generic/stage1/json_structural_indexer.h for haswell: #include */ +/* begin file generic/stage1/json_structural_indexer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for haswell */ +/* including generic/stage1/utf8_validator.h for haswell: #include */ +/* begin file generic/stage1/utf8_validator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for haswell */ +/* end file generic/stage1/amalgamated.h for haswell */ +/* including generic/stage2/amalgamated.h for haswell: #include */ +/* begin file generic/stage2/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage2/base.h for haswell: #include */ +/* begin file generic/stage2/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for haswell */ +/* including generic/stage2/tape_writer.h for haswell: #include */ +/* begin file generic/stage2/tape_writer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for haswell */ +/* including generic/stage2/logger.h for haswell: #include */ +/* begin file generic/stage2/logger.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace haswell { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for haswell */ +/* including generic/stage2/stringparsing.h for haswell: #include */ +/* begin file generic/stage2/stringparsing.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for haswell */ +/* including generic/stage2/structural_iterator.h for haswell: #include */ +/* begin file generic/stage2/structural_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for haswell */ +/* including generic/stage2/tape_builder.h for haswell: #include */ +/* begin file generic/stage2/tape_builder.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for haswell */ +/* end file generic/stage2/amalgamated.h for haswell */ + +// +// Stage 1 +// + +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return haswell::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return haswell::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace haswell +} // namespace simdjson + +/* including simdjson/haswell/end.h: #include */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_SRC_HASWELL_CPP +/* end file haswell.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* including icelake.cpp: #include */ +/* begin file icelake.cpp */ +#ifndef SIMDJSON_SRC_ICELAKE_CPP +#define SIMDJSON_SRC_ICELAKE_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/icelake.h: #include */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for icelake */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +/* end file simdjson/generic/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_H +/* end file simdjson/icelake.h */ +/* including simdjson/icelake/implementation.h: #include */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ + +// defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write +#define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +/* including simdjson/icelake/begin.h: #include */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including generic/amalgamated.h for icelake: #include */ +/* begin file generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for icelake: #include */ +/* begin file generic/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for icelake */ +/* including generic/dom_parser_implementation.h for icelake: #include */ +/* begin file generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace icelake { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for icelake */ +/* including generic/json_character_block.h for icelake: #include */ +/* begin file generic/json_character_block.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for icelake */ +/* end file generic/amalgamated.h for icelake */ +/* including generic/stage1/amalgamated.h for icelake: #include */ +/* begin file generic/stage1/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage1/base.h for icelake: #include */ +/* begin file generic/stage1/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for icelake */ +/* including generic/stage1/buf_block_reader.h for icelake: #include */ +/* begin file generic/stage1/buf_block_reader.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for icelake */ +/* including generic/stage1/json_escape_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_escape_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for icelake */ +/* including generic/stage1/json_string_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_string_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for icelake */ +/* including generic/stage1/utf8_lookup4_algorithm.h for icelake: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +/* including generic/stage1/json_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for icelake */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for icelake: #include */ +/* begin file generic/stage1/find_next_document_index.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for icelake */ +/* including generic/stage1/json_minifier.h for icelake: #include */ +/* begin file generic/stage1/json_minifier.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for icelake */ +/* including generic/stage1/json_structural_indexer.h for icelake: #include */ +/* begin file generic/stage1/json_structural_indexer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for icelake */ +/* including generic/stage1/utf8_validator.h for icelake: #include */ +/* begin file generic/stage1/utf8_validator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for icelake */ +/* end file generic/stage1/amalgamated.h for icelake */ +/* including generic/stage2/amalgamated.h for icelake: #include */ +/* begin file generic/stage2/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage2/base.h for icelake: #include */ +/* begin file generic/stage2/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for icelake */ +/* including generic/stage2/tape_writer.h for icelake: #include */ +/* begin file generic/stage2/tape_writer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for icelake */ +/* including generic/stage2/logger.h for icelake: #include */ +/* begin file generic/stage2/logger.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace icelake { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for icelake */ +/* including generic/stage2/stringparsing.h for icelake: #include */ +/* begin file generic/stage2/stringparsing.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for icelake */ +/* including generic/stage2/structural_iterator.h for icelake: #include */ +/* begin file generic/stage2/structural_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for icelake */ +/* including generic/stage2/tape_builder.h for icelake: #include */ +/* begin file generic/stage2/tape_builder.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for icelake */ +/* end file generic/stage2/amalgamated.h for icelake */ + +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +// +// Stage 1 +// + +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm512_shuffle_epi8(op_table, in.chunks[0]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/** + * We provide a custom version of bit_indexer::write using + * naked intrinsics. + * TODO: make this code more elegant. + */ +// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. +// as a workaround, we disable warnings within the following function. +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +namespace simdjson { namespace icelake { namespace { namespace stage1 { +simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) { return; } + + const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( + 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, + 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, + 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, + 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 + )); + const __m512i start_index = _mm512_set1_epi32(idx); + + const auto count = count_ones(bits); + __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); + _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + + if(count > 16) { + const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); + _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); + if(count > 32) { + const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); + _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); + if(count > 48) { + const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); + _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); + } + } + } + this->tail += count; +} +}}}} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return icelake::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return icelake::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return icelake::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace icelake +} // namespace simdjson + +/* including simdjson/icelake/end.h: #include */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_SRC_ICELAKE_CPP +/* end file icelake.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* including ppc64.cpp: #include */ +/* begin file ppc64.cpp */ +#ifndef SIMDJSON_SRC_PPC64_CPP +#define SIMDJSON_SRC_PPC64_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/ppc64.h: #include */ +/* begin file simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for ppc64 */ +/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for ppc64 */ +/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for ppc64 */ +/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +/* including simdjson/ppc64/implementation.h: #include */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ + +/* including simdjson/ppc64/begin.h: #include */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including generic/amalgamated.h for ppc64: #include */ +/* begin file generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for ppc64: #include */ +/* begin file generic/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for ppc64 */ +/* including generic/dom_parser_implementation.h for ppc64: #include */ +/* begin file generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace ppc64 { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for ppc64 */ +/* including generic/json_character_block.h for ppc64: #include */ +/* begin file generic/json_character_block.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for ppc64 */ +/* end file generic/amalgamated.h for ppc64 */ +/* including generic/stage1/amalgamated.h for ppc64: #include */ +/* begin file generic/stage1/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage1/base.h for ppc64: #include */ +/* begin file generic/stage1/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for ppc64 */ +/* including generic/stage1/buf_block_reader.h for ppc64: #include */ +/* begin file generic/stage1/buf_block_reader.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for ppc64 */ +/* including generic/stage1/json_escape_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_escape_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for ppc64 */ +/* including generic/stage1/json_string_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_string_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for ppc64 */ +/* including generic/stage1/utf8_lookup4_algorithm.h for ppc64: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +/* including generic/stage1/json_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for ppc64 */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for ppc64: #include */ +/* begin file generic/stage1/find_next_document_index.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for ppc64 */ +/* including generic/stage1/json_minifier.h for ppc64: #include */ +/* begin file generic/stage1/json_minifier.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for ppc64 */ +/* including generic/stage1/json_structural_indexer.h for ppc64: #include */ +/* begin file generic/stage1/json_structural_indexer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for ppc64 */ +/* including generic/stage1/utf8_validator.h for ppc64: #include */ +/* begin file generic/stage1/utf8_validator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for ppc64 */ +/* end file generic/stage1/amalgamated.h for ppc64 */ +/* including generic/stage2/amalgamated.h for ppc64: #include */ +/* begin file generic/stage2/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage2/base.h for ppc64: #include */ +/* begin file generic/stage2/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for ppc64 */ +/* including generic/stage2/tape_writer.h for ppc64: #include */ +/* begin file generic/stage2/tape_writer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for ppc64 */ +/* including generic/stage2/logger.h for ppc64: #include */ +/* begin file generic/stage2/logger.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace ppc64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for ppc64 */ +/* including generic/stage2/stringparsing.h for ppc64: #include */ +/* begin file generic/stage2/stringparsing.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for ppc64 */ +/* including generic/stage2/structural_iterator.h for ppc64: #include */ +/* begin file generic/stage2/structural_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for ppc64 */ +/* including generic/stage2/tape_builder.h for ppc64: #include */ +/* begin file generic/stage2/tape_builder.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for ppc64 */ +/* end file generic/stage2/amalgamated.h for ppc64 */ + +// +// Stage 1 +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return ppc64::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return ppc64::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace ppc64 +} // namespace simdjson + +/* including simdjson/ppc64/end.h: #include */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_SRC_PPC64_CPP +/* end file ppc64.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* including westmere.cpp: #include */ +/* begin file westmere.cpp */ +#ifndef SIMDJSON_SRC_WESTMERE_CPP +#define SIMDJSON_SRC_WESTMERE_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/westmere.h: #include */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +/* including simdjson/westmere/implementation.h: #include */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ + +/* including simdjson/westmere/begin.h: #include */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including generic/amalgamated.h for westmere: #include */ +/* begin file generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for westmere: #include */ +/* begin file generic/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for westmere */ +/* including generic/dom_parser_implementation.h for westmere: #include */ +/* begin file generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace westmere { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for westmere */ +/* including generic/json_character_block.h for westmere: #include */ +/* begin file generic/json_character_block.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for westmere */ +/* end file generic/amalgamated.h for westmere */ +/* including generic/stage1/amalgamated.h for westmere: #include */ +/* begin file generic/stage1/amalgamated.h for westmere */ +// Stuff other things depend on +/* including generic/stage1/base.h for westmere: #include */ +/* begin file generic/stage1/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for westmere */ +/* including generic/stage1/buf_block_reader.h for westmere: #include */ +/* begin file generic/stage1/buf_block_reader.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for westmere */ +/* including generic/stage1/json_escape_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_escape_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for westmere */ +/* including generic/stage1/json_string_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_string_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for westmere */ +/* including generic/stage1/utf8_lookup4_algorithm.h for westmere: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +/* including generic/stage1/json_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for westmere */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for westmere: #include */ +/* begin file generic/stage1/find_next_document_index.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for westmere */ +/* including generic/stage1/json_minifier.h for westmere: #include */ +/* begin file generic/stage1/json_minifier.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for westmere */ +/* including generic/stage1/json_structural_indexer.h for westmere: #include */ +/* begin file generic/stage1/json_structural_indexer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for westmere */ +/* including generic/stage1/utf8_validator.h for westmere: #include */ +/* begin file generic/stage1/utf8_validator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for westmere */ +/* end file generic/stage1/amalgamated.h for westmere */ +/* including generic/stage2/amalgamated.h for westmere: #include */ +/* begin file generic/stage2/amalgamated.h for westmere */ +// Stuff other things depend on +/* including generic/stage2/base.h for westmere: #include */ +/* begin file generic/stage2/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for westmere */ +/* including generic/stage2/tape_writer.h for westmere: #include */ +/* begin file generic/stage2/tape_writer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for westmere */ +/* including generic/stage2/logger.h for westmere: #include */ +/* begin file generic/stage2/logger.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace westmere { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for westmere */ +/* including generic/stage2/stringparsing.h for westmere: #include */ +/* begin file generic/stage2/stringparsing.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for westmere */ +/* including generic/stage2/structural_iterator.h for westmere: #include */ +/* begin file generic/stage2/structural_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for westmere */ +/* including generic/stage2/tape_builder.h for westmere: #include */ +/* begin file generic/stage2/tape_builder.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for westmere */ +/* end file generic/stage2/amalgamated.h for westmere */ + +// +// Stage 1 +// + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return westmere::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return westmere::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace westmere +} // namespace simdjson + +/* including simdjson/westmere/end.h: #include */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_SRC_WESTMERE_CPP +/* end file westmere.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_LSX +/* including lsx.cpp: #include */ +/* begin file lsx.cpp */ +#ifndef SIMDJSON_SRC_LSX_CPP +#define SIMDJSON_SRC_LSX_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/lsx.h: #include */ +/* begin file simdjson/lsx.h */ +#ifndef SIMDJSON_LSX_H +#define SIMDJSON_LSX_H + +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/amalgamated.h for lsx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lsx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lsx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lsx */ +/* including simdjson/generic/jsoncharutils.h for lsx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lsx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lsx */ +/* including simdjson/generic/atomparsing.h for lsx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lsx */ +/* including simdjson/generic/dom_parser_implementation.h for lsx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lsx +} // namespace simdjson + +namespace simdjson { +namespace lsx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lsx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +/* including simdjson/generic/numberparsing.h for lsx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lsx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +/* end file simdjson/generic/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_LSX_H +/* end file simdjson/lsx.h */ +/* including simdjson/lsx/implementation.h: #include */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ + +/* including simdjson/lsx/begin.h: #include */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including generic/amalgamated.h for lsx: #include */ +/* begin file generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for lsx: #include */ +/* begin file generic/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for lsx */ +/* including generic/dom_parser_implementation.h for lsx: #include */ +/* begin file generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace lsx { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for lsx */ +/* including generic/json_character_block.h for lsx: #include */ +/* begin file generic/json_character_block.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for lsx */ +/* end file generic/amalgamated.h for lsx */ +/* including generic/stage1/amalgamated.h for lsx: #include */ +/* begin file generic/stage1/amalgamated.h for lsx */ +// Stuff other things depend on +/* including generic/stage1/base.h for lsx: #include */ +/* begin file generic/stage1/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for lsx */ +/* including generic/stage1/buf_block_reader.h for lsx: #include */ +/* begin file generic/stage1/buf_block_reader.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for lsx */ +/* including generic/stage1/json_escape_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_escape_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for lsx */ +/* including generic/stage1/json_string_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_string_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for lsx */ +/* including generic/stage1/utf8_lookup4_algorithm.h for lsx: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for lsx */ +/* including generic/stage1/json_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for lsx */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for lsx: #include */ +/* begin file generic/stage1/find_next_document_index.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for lsx */ +/* including generic/stage1/json_minifier.h for lsx: #include */ +/* begin file generic/stage1/json_minifier.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for lsx */ +/* including generic/stage1/json_structural_indexer.h for lsx: #include */ +/* begin file generic/stage1/json_structural_indexer.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for lsx */ +/* including generic/stage1/utf8_validator.h for lsx: #include */ +/* begin file generic/stage1/utf8_validator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for lsx */ +/* end file generic/stage1/amalgamated.h for lsx */ +/* including generic/stage2/amalgamated.h for lsx: #include */ +/* begin file generic/stage2/amalgamated.h for lsx */ +// Stuff other things depend on +/* including generic/stage2/base.h for lsx: #include */ +/* begin file generic/stage2/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for lsx */ +/* including generic/stage2/tape_writer.h for lsx: #include */ +/* begin file generic/stage2/tape_writer.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for lsx */ +/* including generic/stage2/logger.h for lsx: #include */ +/* begin file generic/stage2/logger.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace lsx { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for lsx */ +/* including generic/stage2/stringparsing.h for lsx: #include */ +/* begin file generic/stage2/stringparsing.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace lsx { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for lsx */ +/* including generic/stage2/structural_iterator.h for lsx: #include */ +/* begin file generic/stage2/structural_iterator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for lsx */ +/* including generic/stage2/tape_builder.h for lsx: #include */ +/* begin file generic/stage2/tape_builder.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace lsx { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for lsx */ +/* end file generic/stage2/amalgamated.h for lsx */ + +// +// Stage 1 +// +namespace simdjson { +namespace lsx { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Inspired by haswell. + // LSX use low 5 bits as index. For the 6 operators (:,[]{}), the unique-5bits is [6:2]. + // The ASCII white-space and operators have these values: (char, hex, unique-5bits) + // (' ', 20, 00000) ('\t', 09, 01001) ('\n', 0A, 01010) ('\r', 0D, 01101) + // (',', 2C, 01011) (':', 3A, 01110) ('[', 5B, 10110) ('{', 7B, 11110) (']', 5D, 10111) ('}', 7D, 11111) + const simd8 ws_table = simd8::repeat_16( + ' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0 + ); + const simd8 op_table_lo = simd8::repeat_16( + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, ':', 0 + ); + const simd8 op_table_hi = simd8::repeat_16( + 0, 0, 0, 0, 0, 0, '[', ']', 0, 0, 0, 0, 0, 0, '{', '}' + ); + uint64_t ws = in.eq({ + in.chunks[0].lookup_16(ws_table), + in.chunks[1].lookup_16(ws_table), + in.chunks[2].lookup_16(ws_table), + in.chunks[3].lookup_16(ws_table) + }); + uint64_t op = in.eq({ + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[0].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[1].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[2].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[3].shr<2>()) + }); + + return { ws, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace lsx { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return lsx::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return lsx::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lsx::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return lsx::stringparsing::parse_string(src, dst, allow_replacement); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return lsx::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace lsx +} // namespace simdjson + +/* including simdjson/lsx/end.h: #include */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_SRC_LSX_CPP +/* end file lsx.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_LASX +/* including lasx.cpp: #include */ +/* begin file lasx.cpp */ +#ifndef SIMDJSON_SRC_LASX_CPP +#define SIMDJSON_SRC_LASX_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/lasx.h: #include */ +/* begin file simdjson/lasx.h */ +#ifndef SIMDJSON_LASX_H +#define SIMDJSON_LASX_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/amalgamated.h for lasx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lasx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lasx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lasx */ +/* including simdjson/generic/jsoncharutils.h for lasx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lasx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lasx */ +/* including simdjson/generic/atomparsing.h for lasx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lasx */ +/* including simdjson/generic/dom_parser_implementation.h for lasx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lasx +} // namespace simdjson + +namespace simdjson { +namespace lasx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lasx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +/* including simdjson/generic/numberparsing.h for lasx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lasx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +/* end file simdjson/generic/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ + +#endif // SIMDJSON_LASX_H +/* end file simdjson/lasx.h */ +/* including simdjson/lasx/implementation.h: #include */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ + +/* including simdjson/lasx/begin.h: #include */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including generic/amalgamated.h for lasx: #include */ +/* begin file generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for lasx: #include */ +/* begin file generic/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for lasx */ +/* including generic/dom_parser_implementation.h for lasx: #include */ +/* begin file generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace lasx { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for lasx */ +/* including generic/json_character_block.h for lasx: #include */ +/* begin file generic/json_character_block.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for lasx */ +/* end file generic/amalgamated.h for lasx */ +/* including generic/stage1/amalgamated.h for lasx: #include */ +/* begin file generic/stage1/amalgamated.h for lasx */ +// Stuff other things depend on +/* including generic/stage1/base.h for lasx: #include */ +/* begin file generic/stage1/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for lasx */ +/* including generic/stage1/buf_block_reader.h for lasx: #include */ +/* begin file generic/stage1/buf_block_reader.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for lasx */ +/* including generic/stage1/json_escape_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_escape_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for lasx */ +/* including generic/stage1/json_string_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_string_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for lasx */ +/* including generic/stage1/utf8_lookup4_algorithm.h for lasx: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for lasx */ +/* including generic/stage1/json_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for lasx */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for lasx: #include */ +/* begin file generic/stage1/find_next_document_index.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for lasx */ +/* including generic/stage1/json_minifier.h for lasx: #include */ +/* begin file generic/stage1/json_minifier.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for lasx */ +/* including generic/stage1/json_structural_indexer.h for lasx: #include */ +/* begin file generic/stage1/json_structural_indexer.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for lasx */ +/* including generic/stage1/utf8_validator.h for lasx: #include */ +/* begin file generic/stage1/utf8_validator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for lasx */ +/* end file generic/stage1/amalgamated.h for lasx */ +/* including generic/stage2/amalgamated.h for lasx: #include */ +/* begin file generic/stage2/amalgamated.h for lasx */ +// Stuff other things depend on +/* including generic/stage2/base.h for lasx: #include */ +/* begin file generic/stage2/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for lasx */ +/* including generic/stage2/tape_writer.h for lasx: #include */ +/* begin file generic/stage2/tape_writer.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for lasx */ +/* including generic/stage2/logger.h for lasx: #include */ +/* begin file generic/stage2/logger.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace lasx { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for lasx */ +/* including generic/stage2/stringparsing.h for lasx: #include */ +/* begin file generic/stage2/stringparsing.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for lasx */ +/* including generic/stage2/structural_iterator.h for lasx: #include */ +/* begin file generic/stage2/structural_iterator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for lasx */ +/* including generic/stage2/tape_builder.h for lasx: #include */ +/* begin file generic/stage2/tape_builder.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for lasx */ +/* end file generic/stage2/amalgamated.h for lasx */ + +// +// Stage 1 +// +namespace simdjson { +namespace lasx { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Inspired by haswell. + // LASX use low 5 bits as index. For the 6 operators (:,[]{}), the unique-5bits is [6:2]. + // The ASCII white-space and operators have these values: (char, hex, unique-5bits) + // (' ', 20, 00000) ('\t', 09, 01001) ('\n', 0A, 01010) ('\r', 0D, 01101) + // (',', 2C, 01011) (':', 3A, 01110) ('[', 5B, 10110) ('{', 7B, 11110) (']', 5D, 10111) ('}', 7D, 11111) + const simd8 ws_table = simd8::repeat_16( + ' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0 + ); + const simd8 op_table_lo = simd8::repeat_16( + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, ':', 0 + ); + const simd8 op_table_hi = simd8::repeat_16( + 0, 0, 0, 0, 0, 0, '[', ']', 0, 0, 0, 0, 0, 0, '{', '}' + ); + uint64_t ws = in.eq({ + in.chunks[0].lookup_16(ws_table), + in.chunks[1].lookup_16(ws_table), + }); + uint64_t op = in.eq({ + __lasx_xvshuf_b(op_table_hi, op_table_lo, in.chunks[0].shr<2>()), + __lasx_xvshuf_b(op_table_hi, op_table_lo, in.chunks[1].shr<2>()), + }); + + return { ws, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace lasx { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return lasx::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return lasx::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lasx::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return lasx::stringparsing::parse_string(src, dst, allow_replacement); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return lasx::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace lasx +} // namespace simdjson + +/* including simdjson/lasx/end.h: #include */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ + +#endif // SIMDJSON_SRC_LASX_CPP +/* end file lasx.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* including fallback.cpp: #include */ +/* begin file fallback.cpp */ +#ifndef SIMDJSON_SRC_FALLBACK_CPP +#define SIMDJSON_SRC_FALLBACK_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/fallback.h: #include */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +/* including simdjson/fallback/implementation.h: #include */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ + +/* including simdjson/fallback/begin.h: #include */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including generic/stage1/find_next_document_index.h for fallback: #include */ +/* begin file generic/stage1/find_next_document_index.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for fallback */ +/* including generic/stage2/stringparsing.h for fallback: #include */ +/* begin file generic/stage2/stringparsing.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for fallback */ +/* including generic/stage2/logger.h for fallback: #include */ +/* begin file generic/stage2/logger.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace fallback { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for fallback */ +/* including generic/stage2/tape_writer.h for fallback: #include */ +/* begin file generic/stage2/tape_writer.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for fallback */ +/* including generic/stage2/tape_builder.h for fallback: #include */ +/* begin file generic/stage2/tape_builder.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for fallback */ + +// +// Stage 1 +// + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) fallback::dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { +namespace stage1 { + +class structural_scanner { +public: + +simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) + : buf{_parser.buf}, + next_structural_index{_parser.structural_indexes.get()}, + parser{_parser}, + len{static_cast(_parser.len)}, + partial{_partial} { +} + +simdjson_inline void add_structural() { + *next_structural_index = idx; + next_structural_index++; +} + +simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; +} + +simdjson_inline void validate_utf8_character() { + // Continuation + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { + // extra continuation + error = UTF8_ERROR; + idx++; + return; + } + + // 2-byte + if ((buf[idx] & 0x20) == 0) { + // missing continuation + if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 1100000_ 10______ + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } + idx += 2; + return; + } + + // 3-byte + if ((buf[idx] & 0x10) == 0) { + // missing continuation + if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11100000 100_____ ________ + if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } + // surrogates: U+D800-U+DFFF 11101101 101_____ + if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } + idx += 3; + return; + } + + // 4-byte + // missing continuation + if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11110000 1000____ ________ ________ + if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } + // too large: > U+10FFFF: + // 11110100 (1001|101_)____ + // 1111(1___|011_|0101) 10______ + // also includes 5, 6, 7 and 8 byte characters: + // 11111___ + if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } + idx += 4; +} + +// Returns true if the string is unclosed. +simdjson_inline bool validate_string() { + idx++; // skip first quote + while (idx < len && buf[idx] != '"') { + if (buf[idx] == '\\') { + idx += 2; + } else if (simdjson_unlikely(buf[idx] & 0x80)) { + validate_utf8_character(); + } else { + if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } + if (idx >= len) { return true; } + return false; +} + +simdjson_inline bool is_whitespace_or_operator(uint8_t c) { + switch (c) { + case '{': case '}': case '[': case ']': case ',': case ':': + case ' ': case '\r': case '\n': case '\t': + return true; + default: + return false; + } +} + +// +// Parse the entire input in STEP_SIZE-byte chunks. +// +simdjson_inline error_code scan() { + bool unclosed_string = false; + for (;idx 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } + } else if(unclosed_string) { error = UNCLOSED_STRING; } + return error; +} + +private: + const uint8_t *buf; + uint32_t *next_structural_index; + dom_parser_implementation &parser; + uint32_t len; + uint32_t idx{0}; + error_code error{SUCCESS}; + stage1_mode partial; +}; // structural_scanner + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { + this->buf = _buf; + this->len = _len; + stage1::structural_scanner scanner(*this, partial); + return scanner.scan(); +} + +// big table for the minifier +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < len) { + unsigned char c = buf[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + dst[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = uint8_t(~nonescape) | (meta[1]); + } + dst_len = pos; // we intentionally do not work with a reference + // for fear of aliasing + return quote ? UNCLOSED_STRING : SUCCESS; +} + +// credit: based on code from Google Fuchsia (Apache Licensed) +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 8 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v1; + memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + if (byte < 0x80) { + pos++; + continue; + } else if ((byte & 0xe0) == 0xc0) { + next_pos = pos + 2; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); + if (code_point < 0x80 || 0x7ff < code_point) { return false; } + } else if ((byte & 0xf0) == 0xe0) { + next_pos = pos + 3; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } + // range check + code_point = + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); + if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +} // namespace fallback +} // namespace simdjson + +// +// Stage 2 +// + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return fallback::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return fallback::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace fallback +} // namespace simdjson + +/* including simdjson/fallback/end.h: #include */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_SRC_FALLBACK_CPP +/* end file fallback.cpp */ +#endif +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +/* end file simdjson.cpp */ diff --git a/third-party/simdjson/simdjson.h b/third-party/simdjson/simdjson.h new file mode 100644 index 00000000..5b06e962 --- /dev/null +++ b/third-party/simdjson/simdjson.h @@ -0,0 +1,117769 @@ +/* auto-generated on 2024-06-11 14:08:20 -0400. Do not edit! */ +/* including simdjson.h: */ +/* begin file simdjson.h */ +#ifndef SIMDJSON_H +#define SIMDJSON_H + +/** + * @mainpage + * + * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). + * + * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. + + #include "simdjson.h" + + int main(void) { + // load from `twitter.json` file: + simdjson::dom::parser parser; + simdjson::dom::element tweets = parser.load("twitter.json"); + std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; + + // Parse and iterate through an array of objects + auto abstract_json = R"( [ + { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, + { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } + ] )"_padded; + + for (simdjson::dom::object obj : parser.parse(abstract_json)) { + for(const auto key_value : obj) { + cout << "key: " << key_value.key << " : "; + simdjson::dom::object innerobj = key_value.value; + cout << "a: " << double(innerobj["a"]) << ", "; + cout << "b: " << double(innerobj["b"]) << ", "; + cout << "c: " << int64_t(innerobj["c"]) << endl; + } + } + } + */ + +/* including simdjson/common_defs.h: #include "simdjson/common_defs.h" */ +/* begin file simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* including simdjson/compiler_check.h: #include "simdjson/compiler_check.h" */ +/* begin file simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file simdjson/compiler_check.h */ +/* including simdjson/portability.h: #include "simdjson/portability.h" */ +/* begin file simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +// We are using size_t without namespace std:: throughout the project +using std::size_t; + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__riscv) && __riscv_xlen == 64 +#define SIMDJSON_IS_RISCV64 1 +#elif defined(__loongarch_lp64) +#define SIMDJSON_IS_LOONGARCH64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#if defined(__ALTIVEC__) +#define SIMDJSON_IS_PPC64_VMX 1 +#endif // defined(__ALTIVEC__) +#else +#define SIMDJSON_IS_32BITS 1 + +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif + +#if SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +// In the future, we should allow programmers +// to get warning. +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +#define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ +#define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) +#define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, +// then do away with asserts and use __assume. +#if SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// This should only ever be enabled in debug mode. +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file simdjson/portability.h */ + +namespace simdjson { +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if SIMDJSON_CLANG_VISUAL_STUDIO + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused) + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* including simdjson/nonstd/string_view.hpp: #include "simdjson/nonstd/string_view.hpp" */ +/* begin file simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 8 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +#ifndef nssv_CONFIG_CONSTEXPR11_STD_SEARCH +# define nssv_CONFIG_CONSTEXPR11_STD_SEARCH 1 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if defined(_MSC_VER) +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++23 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202002L ) +#define nssv_CPP23_OR_GREATER ( nssv_CPLUSPLUS >= 202300L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +#include + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for string-view-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_IS_DEFAULT nssv_CPP11_140 +#define nssv_HAVE_IS_DELETE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif nssv_COMPILER_GNUC_VERSION >= 480 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif nssv_COMPILER_GNUC_VERSION >= 480 +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +// +// basic_string_view declaration: +// + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make search() non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return haystack.starts_with( needle ) ? haystack.begin() : + haystack.empty() ? haystack.end() : search( haystack.substr(1), needle ); +} + +#else // OPTIMIZE + +// non-recursive: + +#if nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return std::search( haystack.begin(), haystack.end(), needle.begin(), needle.end() ); +} + +#else // nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +template< class CharT, class Traits = std::char_traits > +nssv_constexpr14 const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + while ( needle.size() <= haystack.size() ) + { + if ( haystack.starts_with(needle) ) + { + return haystack.cbegin(); + } + haystack = basic_string_view{ haystack.begin() + 1, haystack.size() - 1U }; + } + return haystack.cend(); +} +#endif // nssv_CONFIG_CONSTEXPR11_STD_SEARCH + +#endif // OPTIMIZE +#endif // nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + +} // namespace detail + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + +#if nssv_HAVE_NULLPTR +# if nssv_HAVE_IS_DELETE + nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept = delete; +# else + private: nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept; public: +# endif +#endif + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos : to_pos( +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + detail::search( substr(pos), v ) +#else + std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) +#endif + ); + } + + nssv_constexpr size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !sentry ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file simdjson/common_defs.h */ + +// This provides the public API for simdjson. +// DOM and ondemand are amalgamated separately, in simdjson.h +/* including simdjson/simdjson_version.h: #include "simdjson/simdjson_version.h" */ +/* begin file simdjson/simdjson_version.h */ +// /include/simdjson/simdjson_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDJSON_SIMDJSON_VERSION_H +#define SIMDJSON_SIMDJSON_VERSION_H + +/** The version of simdjson being used (major.minor.revision) */ +#define SIMDJSON_VERSION "3.9.4" + +namespace simdjson { +enum { + /** + * The major version (MAJOR.minor.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MAJOR = 3, + /** + * The minor version (major.MINOR.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MINOR = 9, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 4 +}; +} // namespace simdjson + +#endif // SIMDJSON_SIMDJSON_VERSION_H +/* end file simdjson/simdjson_version.h */ + +/* including simdjson/base.h: #include "simdjson/base.h" */ +/* begin file simdjson/base.h */ +/** + * @file Base declarations for all simdjson headers + * @private + */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* skipped duplicate #include "simdjson/common_defs.h" */ +/* skipped duplicate #include "simdjson/compiler_check.h" */ +/* including simdjson/error.h: #include "simdjson/error.h" */ +/* begin file simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + BIGINT_ERROR, ///< The integer value exceeds 64 bits + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether + * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code + * that was written while breaking some simdjson::ondemand requirement. They should not occur in released + * code after these issues were fixed. + */ + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file simdjson/error.h */ +/* skipped duplicate #include "simdjson/portability.h" */ + +/** + * @brief The top level simdjson namespace, containing everything the library provides. + */ +namespace simdjson { + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +class implementation; +struct padded_string; +class padded_string_view; +enum class stage1_mode; + +namespace internal { + +template +class atomic_ptr; +class dom_parser_implementation; +class escape_json_string; +class tape_ref; +struct value128; +enum class tape_type; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_BASE_H +/* end file simdjson/base.h */ + +/* skipped duplicate #include "simdjson/error.h" */ +/* including simdjson/error-inl.h: #include "simdjson/error-inl.h" */ +/* begin file simdjson/error-inl.h */ +#ifndef SIMDJSON_ERROR_INL_H +#define SIMDJSON_ERROR_INL_H + +/* skipped duplicate #include "simdjson/error.h" */ + +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_ERROR_INL_H +/* end file simdjson/error-inl.h */ +/* including simdjson/implementation.h: #include "simdjson/implementation.h" */ +/* begin file simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* including simdjson/internal/atomic_ptr.h: #include "simdjson/internal/atomic_ptr.h" */ +/* begin file simdjson/internal/atomic_ptr.h */ +#ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H +#define SIMDJSON_INTERNAL_ATOMIC_PTR_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include + +namespace simdjson { +namespace internal { + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H +/* end file simdjson/internal/atomic_ptr.h */ +/* including simdjson/internal/dom_parser_implementation.h: #include "simdjson/internal/dom_parser_implementation.h" */ +/* begin file simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/internal/dom_parser_implementation.h */ + +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual std::string name() const { return std::string(_name); } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual std::string description() const { return std::string(_description); } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } +protected: + ~implementation() = default; + +private: + /** + * The name of this implementation. + */ + std::string_view _name; + + /** + * The description of this implementation. + */ + std::string_view _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file simdjson/implementation.h */ +/* including simdjson/minify.h: #include "simdjson/minify.h" */ +/* begin file simdjson/minify.h */ +#ifndef SIMDJSON_MINIFY_H +#define SIMDJSON_MINIFY_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/padded_string.h: #include "simdjson/padded_string.h" */ +/* begin file simdjson/padded_string.h */ +#ifndef SIMDJSON_PADDED_STRING_H +#define SIMDJSON_PADDED_STRING_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include +#include +#include +#include + +namespace simdjson { + +class padded_string_view; + +/** + * String with extra allocation for ease of use with parser::parse() + * + * This is a move-only class, it cannot be copied. + */ +struct padded_string final { + + /** + * Create a new, empty padded string. + */ + explicit inline padded_string() noexcept; + /** + * Create a new padded string buffer. + * + * @param length the size of the string. + */ + explicit inline padded_string(size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param data the buffer to copy + * @param length the number of bytes to copy + */ + explicit inline padded_string(const char *data, size_t length) noexcept; +#ifdef __cpp_char8_t + explicit inline padded_string(const char8_t *data, size_t length) noexcept; +#endif + /** + * Create a new padded string by copying the given input. + * + * @param str_ the string to copy + */ + inline padded_string(const std::string & str_ ) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param sv_ the string to copy + */ + inline padded_string(std::string_view sv_) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string(padded_string &&o) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string &operator=(padded_string &&o) noexcept; + inline void swap(padded_string &o) noexcept; + ~padded_string() noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t size() const noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t length() const noexcept; + + /** + * The string data. + **/ + const char *data() const noexcept; + const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} + + /** + * The string data. + **/ + char *data() noexcept; + + /** + * Create a std::string_view with the same content. + */ + operator std::string_view() const; + + /** + * Create a padded_string_view with the same content. + */ + operator padded_string_view() const noexcept; + + /** + * Load this padded string from a file. + * + * ## Windows and Unicode + * + * Windows users who need to read files with non-ANSI characters in the + * name should set their code page to UTF-8 (65001) before calling this + * function. This should be the default with Windows 11 and better. + * Further, they may use the AreFileApisANSI function to determine whether + * the filename is interpreted using the ANSI or the system default OEM + * codepage, and they may call SetFileApisToOEM accordingly. + * + * @return IO_ERROR on error. Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * + * @param path the path to the file. + **/ + inline static simdjson_result load(std::string_view path) noexcept; + +private: + padded_string &operator=(const padded_string &o) = delete; + padded_string(const padded_string &o) = delete; + + size_t viable_size{0}; + char *data_ptr{nullptr}; + +}; // padded_string + +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +// This is deliberately outside of simdjson so that people get it without having to use the namespace +inline simdjson::padded_string operator "" _padded(const char *str, size_t len); +#ifdef __cpp_char8_t +inline simdjson::padded_string operator "" _padded(const char8_t *str, size_t len); +#endif + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_H +/* end file simdjson/padded_string.h */ +#include +#include +#include + +namespace simdjson { + +/** + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * This function is much faster than parsing a JSON string and then writing a minified version of it. + * However, it does not validate the input. It will merely return an error in simple cases (e.g., if + * there is a string that was never terminated). + * + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; + +} // namespace simdjson + +#endif // SIMDJSON_MINIFY_H +/* end file simdjson/minify.h */ +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* including simdjson/padded_string-inl.h: #include "simdjson/padded_string-inl.h" */ +/* begin file simdjson/padded_string-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_INL_H +#define SIMDJSON_PADDED_STRING_INL_H + +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* including simdjson/padded_string_view.h: #include "simdjson/padded_string_view.h" */ +/* begin file simdjson/padded_string_view.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_H +#define SIMDJSON_PADDED_STRING_VIEW_H + +/* skipped duplicate #include "simdjson/portability.h" */ +/* skipped duplicate #include "simdjson/base.h" // for SIMDJSON_PADDING */ +/* skipped duplicate #include "simdjson/error.h" */ + +#include +#include +#include +#include + +namespace simdjson { + +/** + * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). + */ +class padded_string_view : public std::string_view { +private: + size_t _capacity; + +public: + /** Create an empty padded_string_view. */ + inline padded_string_view() noexcept = default; + + /** + * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param len The length of the string (not including padding). + * @param capacity The allocated length of the string, including padding. If the capacity is less + * than the length, the capacity will be set to the length. + */ + explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; + /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ + explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; +#ifdef __cpp_char8_t + explicit inline padded_string_view(const char8_t* s, size_t len, size_t capacity) noexcept; +#endif + /** + * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * The capacity of the string will be used to determine its padding. + * + * @param s The string. + */ + explicit inline padded_string_view(const std::string &s) noexcept; + + /** + * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param capacity The allocated length of the string, including padding. If the capacity is less + * than the length, the capacity will be set to the length. + */ + explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; + + /** The number of allocated bytes. */ + inline size_t capacity() const noexcept; + + /** + * Remove the UTF-8 Byte Order Mark (BOM) if it exists. + * + * @return whether a BOM was found and removed + */ + inline bool remove_utf8_bom() noexcept; + + /** The amount of padding on the string (capacity() - length()) */ + inline size_t padding() const noexcept; + +}; // padded_string_view + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string_view. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false); +#endif + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_H +/* end file simdjson/padded_string_view.h */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* including simdjson/padded_string_view-inl.h: #include "simdjson/padded_string_view-inl.h" */ +/* begin file simdjson/padded_string_view-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H +#define SIMDJSON_PADDED_STRING_VIEW_INL_H + +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include /* memcmp */ + +namespace simdjson { + +inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept + : std::string_view(s, len), _capacity(capacity) +{ + if(_capacity < len) { _capacity = len; } +} + +inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} +#ifdef __cpp_char8_t +inline padded_string_view::padded_string_view(const char8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} +#endif +inline padded_string_view::padded_string_view(const std::string &s) noexcept + : std::string_view(s), _capacity(s.capacity()) +{ +} + +inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept + : std::string_view(s), _capacity(capacity) +{ + if(_capacity < s.length()) { _capacity = s.length(); } +} + +inline size_t padded_string_view::capacity() const noexcept { return _capacity; } + +inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } + +inline bool padded_string_view::remove_utf8_bom() noexcept { + if(length() < 3) { return false; } + if (std::memcmp(data(), "\xEF\xBB\xBF", 3) == 0) { + remove_prefix(3); + _capacity -= 3; + return true; + } + return false; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + + +#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H +/* end file simdjson/padded_string_view-inl.h */ + +#include + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept { + const size_t totalpaddedlength = length + SIMDJSON_PADDING; + if(totalpaddedlength(1UL<<20)) { + return nullptr; + } +#endif + + char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; + if (padded_buffer == nullptr) { + return nullptr; + } + // We write nulls in the padded region to avoid having uninitialized + // content which may trigger warning for some sanitizers + std::memset(padded_buffer + length, 0, totalpaddedlength - length); + return padded_buffer; +} // allocate_padded_buffer() + +} // namespace internal + + +inline padded_string::padded_string() noexcept = default; +inline padded_string::padded_string(size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { +} +inline padded_string::padded_string(const char *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, data, length); + } +} +#ifdef __cpp_char8_t +inline padded_string::padded_string(const char8_t *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, reinterpret_cast(data), length); + } +} +#endif +// note: do not pass std::string arguments by value +inline padded_string::padded_string(const std::string & str_ ) noexcept + : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { + if (data_ptr != nullptr) { + std::memcpy(data_ptr, str_.data(), str_.size()); + } +} +// note: do pass std::string_view arguments by value +inline padded_string::padded_string(std::string_view sv_) noexcept + : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { + if(simdjson_unlikely(!data_ptr)) { + //allocation failed or zero size + viable_size = 0; + return; + } + if (sv_.size()) { + std::memcpy(data_ptr, sv_.data(), sv_.size()); + } +} +inline padded_string::padded_string(padded_string &&o) noexcept + : viable_size(o.viable_size), data_ptr(o.data_ptr) { + o.data_ptr = nullptr; // we take ownership +} + +inline padded_string &padded_string::operator=(padded_string &&o) noexcept { + delete[] data_ptr; + data_ptr = o.data_ptr; + viable_size = o.viable_size; + o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; + return *this; +} + +inline void padded_string::swap(padded_string &o) noexcept { + size_t tmp_viable_size = viable_size; + char *tmp_data_ptr = data_ptr; + viable_size = o.viable_size; + data_ptr = o.data_ptr; + o.data_ptr = tmp_data_ptr; + o.viable_size = tmp_viable_size; +} + +inline padded_string::~padded_string() noexcept { + delete[] data_ptr; +} + +inline size_t padded_string::size() const noexcept { return viable_size; } + +inline size_t padded_string::length() const noexcept { return viable_size; } + +inline const char *padded_string::data() const noexcept { return data_ptr; } + +inline char *padded_string::data() noexcept { return data_ptr; } + +inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } + +inline padded_string::operator padded_string_view() const noexcept { + return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); +} + +inline simdjson_result padded_string::load(std::string_view filename) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(filename.data(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 llen = _ftelli64(fp); + if(llen == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long llen = std::ftell(fp); + if((llen < 0) || (llen == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Allocate the padded_string + size_t len = static_cast(llen); + padded_string s(len); + if (s.data() == nullptr) { + std::fclose(fp); + return MEMALLOC; + } + + // Read the padded_string + std::rewind(fp); + size_t bytes_read = std::fread(s.data(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != len) { + return IO_ERROR; + } + + return s; +} + +} // namespace simdjson + +inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { + return simdjson::padded_string(str, len); +} +#ifdef __cpp_char8_t +inline simdjson::padded_string operator "" _padded(const char8_t *str, size_t len) { + return simdjson::padded_string(reinterpret_cast(str), len); +} +#endif +#endif // SIMDJSON_PADDED_STRING_INL_H +/* end file simdjson/padded_string-inl.h */ +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/padded_string_view-inl.h" */ + +/* including simdjson/dom.h: #include "simdjson/dom.h" */ +/* begin file simdjson/dom.h */ +#ifndef SIMDJSON_DOM_H +#define SIMDJSON_DOM_H + +/* including simdjson/dom/base.h: #include "simdjson/dom/base.h" */ +/* begin file simdjson/dom/base.h */ +#ifndef SIMDJSON_DOM_BASE_H +#define SIMDJSON_DOM_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { + +/** + * @brief A DOM API on top of the simdjson parser. + */ +namespace dom { + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +class array; +class document; +class document_stream; +class element; +class key_value_pair; +class object; +class parser; + +#ifdef SIMDJSON_THREADS_ENABLED +struct stage1_worker; +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +namespace internal { + +template +class string_builder; +class tape_ref; + +} // namespace internal + +} // namespace simdjson + +#endif // SIMDJSON_DOM_BASE_H +/* end file simdjson/dom/base.h */ +/* including simdjson/dom/array.h: #include "simdjson/dom/array.h" */ +/* begin file simdjson/dom/array.h */ +#ifndef SIMDJSON_DOM_ARRAY_H +#define SIMDJSON_DOM_ARRAY_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/internal/tape_ref.h: #include "simdjson/internal/tape_ref.h" */ +/* begin file simdjson/internal/tape_ref.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_REF_H +#define SIMDJSON_INTERNAL_TAPE_REF_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace dom { +class document; +} // namespace dom + +namespace internal { + +/** + * A reference to an element on the tape. Internal only. + */ +class tape_ref { +public: + simdjson_inline tape_ref() noexcept; + simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; + inline size_t after_element() const noexcept; + simdjson_inline tape_type tape_ref_type() const noexcept; + simdjson_inline uint64_t tape_value() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_false() const noexcept; + simdjson_inline bool is_true() const noexcept; + simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_inline uint32_t matching_brace_index() const noexcept; + simdjson_inline uint32_t scope_count() const noexcept; + template + simdjson_inline T next_tape_value() const noexcept; + simdjson_inline uint32_t get_string_length() const noexcept; + simdjson_inline const char * get_c_str() const noexcept; + inline std::string_view get_string_view() const noexcept; + simdjson_inline bool is_document_root() const noexcept; + simdjson_inline bool usable() const noexcept; + + /** The document this element references. */ + const dom::document *doc; + + /** The index of this element on `doc.tape[]` */ + size_t json_index; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_REF_H +/* end file simdjson/internal/tape_ref.h */ + +namespace simdjson { +namespace dom { + +/** + * JSON array. + */ +class array { +public: + /** Create a new, invalid array */ + simdjson_inline array() noexcept; + + class iterator { + public: + using value_type = element; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + /** + * Get the actual value + */ + inline reference operator*() const noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator& operator++() noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class array; + }; + + /** + * Return the first array element. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last array element. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the array (number of immediate children). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t number_of_slots() const noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); + * a.at_pointer("/0/foo/a/1") == 20 + * a.at_pointer("0")["foo"]["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity and + * is equivalent to the following: + * + * size_t i=0; + * for (auto element : *this) { + * if (i == index) { return element; } + * i++; + * } + * return INDEX_OUT_OF_BOUNDS; + * + * Avoid calling the at() function repeatedly. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + +private: + simdjson_inline array(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at(size_t index) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::array::iterator begin() const noexcept(false); + inline dom::array::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_ARRAY_H +/* end file simdjson/dom/array.h */ +/* including simdjson/dom/document_stream.h: #include "simdjson/dom/document_stream.h" */ +/* begin file simdjson/dom/document_stream.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_H +#define SIMDJSON_DOCUMENT_STREAM_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/dom/parser.h: #include "simdjson/dom/parser.h" */ +/* begin file simdjson/dom/parser.h */ +#ifndef SIMDJSON_DOM_PARSER_H +#define SIMDJSON_DOM_PARSER_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/dom/document.h: #include "simdjson/dom/document.h" */ +/* begin file simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ + +#include + +namespace simdjson { +namespace dom { + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/document.h */ + +namespace simdjson { + +namespace dom { + +/** + * A persistent document parser. + * + * The parser is designed to be reused, holding the internal buffers necessary to do parsing, + * as well as memory for a single document. The parsed document is overwritten on each parse. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + * + * @note Moving a parser instance may invalidate "dom::element" instances. If you need to + * preserve both the "dom::element" instances and the parser, consider wrapping the parser + * instance in a std::unique_ptr instance: + * + * std::unique_ptr parser(new dom::parser{}); + * auto error = parser->load(f).get(root); + * + * You can then move std::unique_ptr safely. + * + * @note This is not thread safe: one parser cannot produce two documents at the same time! + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + * + * @param max_capacity The maximum document length the parser can automatically handle. The parser + * will allocate more capacity on an as needed basis (when it sees documents too big to handle) + * up to this amount. The parser still starts with zero capacity no matter what this number is: + * to allocate an initial capacity, call allocate() after constructing the parser. + * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). + */ + simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser(parser &&other) noexcept; + parser(const parser &) = delete; ///< @private Disallow copying + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser &operator=(parser &&other) noexcept; + parser &operator=(const parser &) = delete; ///< @private Disallow copying + + /** Deallocate the JSON parser. */ + ~parser()=default; + + /** + * Load a JSON document from a file and return a reference to it. + * + * dom::parser parser; + * const element doc = parser.load("jsonexamples/twitter.json"); + * + * The function is eager: the file's content is loaded in memory inside the parser instance + * and immediately parsed. The file can be deleted after the `parser.load` call. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than the file length, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * ## Windows and Unicode + * + * Windows users who need to read files with non-ANSI characters in the + * name should set their code page to UTF-8 (65001) before calling this + * function. This should be the default with Windows 11 and better. + * Further, they may use the AreFileApisANSI function to determine whether + * the filename is interpreted using the ANSI or the system default OEM + * codepage, and they may call SetFileApisToOEM accordingly. + * + * @param path The path to load. + * @return The document, or an error: + * - IO_ERROR if there was an error opening or reading the file. + * Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load(const std::string &path) & noexcept; + inline simdjson_result load(const std::string &path) && = delete ; + + /** + * Load a JSON document from a file into a provide document instance and return a temporary reference to it. + * It is similar to the function `load` except that instead of parsing into the internal + * `document` instance associated with the parser, it allows the user to provide a document + * instance. + * + * dom::parser parser; + * dom::document doc; + * element doc_root = parser.load_into_document(doc, "jsonexamples/twitter.json"); + * + * The function is eager: the file's content is loaded in memory inside the parser instance + * and immediately parsed. The file can be deleted after the `parser.load_into_document` call. + * + * ### IMPORTANT: Document Lifetime + * + * After the call to load_into_document, the parser is no longer needed. + * + * The JSON document lives in the document instance: you must keep the document + * instance alive while you navigate through it (i.e., used the returned value from + * load_into_document). You are encourage to reuse the document instance + * many times with new data to avoid reallocations: + * + * dom::document doc; + * element doc_root1 = parser.load_into_document(doc, "jsonexamples/twitter.json"); + * //... doc_root1 is a pointer inside doc + * element doc_root2 = parser.load_into_document(doc, "jsonexamples/twitter.json"); + * //... doc_root2 is a pointer inside doc + * // at this point doc_root1 is no longer safe + * + * Moving the document instance is safe, but it invalidates the element instances. After + * moving a document, you can recover safe access to the document root with its `root()` method. + * + * @param doc The document instance where the parsed data will be stored (on success). + * @param path The path to load. + * @return The document, or an error: + * - IO_ERROR if there was an error opening or reading the file. + * Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load_into_document(document& doc, const std::string &path) & noexcept; + inline simdjson_result load_into_document(document& doc, const std::string &path) && =delete; + + /** + * Parse a JSON document and return a temporary reference to it. + * + * dom::parser parser; + * element doc_root = parser.parse(buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(json, json_len); + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. + * The benefit of setting realloc_if_needed to false is that you avoid a temporary + * memory allocation and a copy. + * + * The padded bytes may be read. It is not important how you initialize + * these bytes though we recommend a sensible default like null character values or spaces. + * For example, the following low-level code is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + * std::memcpy(padded_json_copy.get(), json, json_len); + * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); + * + * ### Parser Capacity + * + * If the parser's current capacity is less than len, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of the document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const std::string &s) & noexcept; + simdjson_inline simdjson_result parse(const std::string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse(const padded_string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string_view &v) & noexcept; + simdjson_inline simdjson_result parse(const padded_string_view &v) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse(const char *buf) noexcept = delete; + + /** + * Parse a JSON document into a provide document instance and return a temporary reference to it. + * It is similar to the function `parse` except that instead of parsing into the internal + * `document` instance associated with the parser, it allows the user to provide a document + * instance. + * + * dom::parser parser; + * dom::document doc; + * element doc_root = parser.parse_into_document(doc, buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * After the call to parse_into_document, the parser is no longer needed. + * + * The JSON document lives in the document instance: you must keep the document + * instance alive while you navigate through it (i.e., used the returned value from + * parse_into_document). You are encourage to reuse the document instance + * many times with new data to avoid reallocations: + * + * dom::document doc; + * element doc_root1 = parser.parse_into_document(doc, buf1, len); + * //... doc_root1 is a pointer inside doc + * element doc_root2 = parser.parse_into_document(doc, buf1, len); + * //... doc_root2 is a pointer inside doc + * // at this point doc_root1 is no longer safe + * + * Moving the document instance is safe, but it invalidates the element instances. After + * moving a document, you can recover safe access to the document root with its `root()` method. + * + * @param doc The document instance where the parsed data will be stored (on success). + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; + + /** + * Load a file containing many JSON documents. + * + * dom::parser parser; + * for (const element doc : parser.load_many(path)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` + * function has returned. The memory is held by the `parser` instance. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * If there is a UTF-8 BOM, the parser skips it. + * + * ### Format + * + * The file must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * Documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path File name pointing at the concatenated JSON to parse. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet + * spot in our tests. + * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE + * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - IO_ERROR if there was an error opening or reading the file. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + + /** + * Parse a buffer containing many JSON documents. + * + * dom::parser parser; + * for (element doc : parser.parse_many(buf, len)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. In particular, the following is unsafe and will not compile: + * + * auto docs = parser.parse_many("[\"temporary data\"]"_padded); + * // here the string "[\"temporary data\"]" may no longer exist in memory + * // the parser instance may not have even accessed the input yet + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The following is safe: + * + * auto json = "[\"temporary data\"]"_padded; + * auto docs = parser.parse_many(json); + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * If there is a UTF-8 BOM, the parser skips it. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * @private deprecated because it returns bool instead of error_code, which is our standard for + * failures. Use allocate() instead. + * + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return true if successful, false if allocation failed. + */ + [[deprecated("Use allocate() instead.")]] + simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount. + * + * @return Maximum capacity, in bytes. + */ + simdjson_inline size_t max_capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Set max_capacity. This is the largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount as documents are passed + * to it. + * + * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, + * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, + * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. + * + * This call will not allocate or deallocate, even if capacity is currently above max_capacity. + * + * @param max_capacity The new maximum capacity, in bytes. + */ + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + +#ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; +#endif + /** @private Use the new DOM API instead */ + class Iterator; + /** @private Use simdjson_error instead */ + using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; + + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + + /** @private Use `if (parser.parse(...).error())` instead */ + bool valid{false}; + /** @private Use `parser.parse(...).error()` instead */ + error_code error{UNINITIALIZED}; + + /** @private Use `parser.parse(...).value()` instead */ + document doc{}; + + /** @private returns true if the document parsed was valid */ + [[deprecated("Use the result of parser.parse() instead")]] + inline bool is_valid() const noexcept; + + /** + * @private return an error code corresponding to the last parsing attempt, see + * simdjson.h will return UNINITIALIZED if no parsing was attempted + */ + [[deprecated("Use the result of parser.parse() instead")]] + inline int get_error_code() const noexcept; + + /** @private return the string equivalent of "get_error_code" */ + [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] + inline std::string get_error_message() const noexcept; + + /** @private */ + [[deprecated("Use cout << on the result of parser.parse() instead")]] + inline bool print_json(std::ostream &os) const noexcept; + + /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ + inline bool dump_raw_tape(std::ostream &os) const noexcept; + + +private: + /** + * The maximum document length this parser will automatically support. + * + * The parser will not be automatically allocated above this amount. + */ + size_t _max_capacity; + + /** + * The loaded buffer (reused each time load() is called) + */ + std::unique_ptr loaded_bytes; + + /** Capacity of loaded_bytes buffer. */ + size_t _loaded_bytes_capacity{0}; + + // all nodes are stored on the doc.tape using a 64-bit word. + // + // strings, double and ints are stored as + // a 64-bit word with a pointer to the actual value + // + // + // + // for objects or arrays, store [ or { at the beginning and } and ] at the + // end. For the openings ([ or {), we annotate them with a reference to the + // location on the doc.tape of the end, and for then closings (} and ]), we + // annotate them with a reference to the location of the opening + // + // + + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * internal document. + */ + inline error_code ensure_capacity(size_t desired_capacity) noexcept; + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * provided document. + */ + inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; + + /** Read the file into loaded_bytes */ + inline simdjson_result read_file(const std::string &path) noexcept; + + friend class parser::Iterator; + friend class document_stream; + + +}; // class parser + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSER_H +/* end file simdjson/dom/parser.h */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + dom::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; +}; +#endif + +/** + * A forward-only stream of documents. + * + * Produced by parser::parse_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * error = parser.parse_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.parse_many(json,window); + * for(auto doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + /** + * An iterator through a forward-only stream of documents. + */ + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + /** + * @private + * + * Gives a view of the current document. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * std::string_view v = i->source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline std::string_view source() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + friend class document_stream; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + dom::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** + * Pass the next batch through stage 1 and return when finished. + * When threads are enabled, this may wait for the stage 1 thread to finish. + */ + inline void load_batch() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; + + dom::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; +#ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + friend struct stage1_worker; + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + dom::parser stage1_thread_parser{}; +#endif // SIMDJSON_THREADS_ENABLED + + friend class dom::parser; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; + +}; // class document_stream + +} // namespace dom + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private + +#if SIMDJSON_EXCEPTIONS + simdjson_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_inline dom::document_stream::iterator end() noexcept(false); +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator begin() noexcept; + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator end() noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS +}; // struct simdjson_result + +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_STREAM_H +/* end file simdjson/dom/document_stream.h */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* including simdjson/dom/element.h: #include "simdjson/dom/element.h" */ +/* begin file simdjson/dom/element.h */ +#ifndef SIMDJSON_DOM_ELEMENT_H +#define SIMDJSON_DOM_ELEMENT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/array.h" */ + +namespace simdjson { +namespace dom { + +/** + * The actual concrete type of a JSON element + * This is the type it is most easily cast to with get<>. + */ +enum class element_type { + ARRAY = '[', ///< dom::array + OBJECT = '{', ///< dom::object + INT64 = 'l', ///< int64_t + UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t + DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. + STRING = '"', ///< std::string_view + BOOL = 't', ///< bool + NULL_VALUE = 'n' ///< null +}; + +/** + * A JSON element. + * + * References an element in a JSON document, representing a JSON null, boolean, string, number, + * array or object. + */ +class element { +public: + /** Create a new, invalid element. */ + simdjson_inline element() noexcept; + + /** The type of this element. */ + simdjson_inline element_type type() const noexcept; + + /** + * Cast this element to an array. + * + * @returns An object that can be used to iterate the array, or: + * INCORRECT_TYPE if the JSON element is not an array. + */ + inline simdjson_result get_array() const noexcept; + /** + * Cast this element to an object. + * + * @returns An object that can be used to look up or iterate the object's fields, or: + * INCORRECT_TYPE if the JSON element is not an object. + */ + inline simdjson_result get_object() const noexcept; + /** + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will + * be invalidated the next time it parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_c_str() const noexcept; + /** + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it + * parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string() const noexcept; + /** + * Cast this element to a signed integer. + * + * @returns A signed 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is negative. + */ + inline simdjson_result get_int64() const noexcept; + /** + * Cast this element to an unsigned integer. + * + * @returns An unsigned 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is too large. + */ + inline simdjson_result get_uint64() const noexcept; + /** + * Cast this element to a double floating-point. + * + * @returns A double value. + * Returns INCORRECT_TYPE if the JSON element is not a number. + */ + inline simdjson_result get_double() const noexcept; + /** + * Cast this element to a bool. + * + * @returns A bool value. + * Returns INCORRECT_TYPE if the JSON element is not a boolean. + */ + inline simdjson_result get_bool() const noexcept; + + /** + * Whether this element is a json array. + * + * Equivalent to is(). + */ + inline bool is_array() const noexcept; + /** + * Whether this element is a json object. + * + * Equivalent to is(). + */ + inline bool is_object() const noexcept; + /** + * Whether this element is a json string. + * + * Equivalent to is() or is(). + */ + inline bool is_string() const noexcept; + /** + * Whether this element is a json number that fits in a signed 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_int64() const noexcept; + /** + * Whether this element is a json number that fits in an unsigned 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_uint64() const noexcept; + /** + * Whether this element is a json number that fits in a double. + * + * Equivalent to is(). + */ + inline bool is_double() const noexcept; + + /** + * Whether this element is a json number. + * + * Both integers and floating points will return true. + */ + inline bool is_number() const noexcept; + + /** + * Whether this element is a json `true` or `false`. + * + * Equivalent to is(). + */ + inline bool is_bool() const noexcept; + /** + * Whether this element is a json `null`. + */ + inline bool is_null() const noexcept; + + /** + * Tell whether the value can be cast to provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + */ + template + simdjson_inline bool is() const noexcept; + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array() or get_string() instead. + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @returns The value cast to the given type, or: + * INCORRECT_TYPE if the value cannot be cast to the given type. + */ + + template + inline simdjson_result get() const noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are Boolean (bool), numbers (double, uint64_t, int64_t), " + "strings (std::string_view, const char *), arrays (dom::array) and objects (dom::object). " + "We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + "get_object(), get_array() or get_string() instead of the get template."); + } + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the value. May not be set if there is an error. + * + * @returns The error that occurred, or SUCCESS if there was no error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + /** + * Get the value as the provided type (T), setting error if it's not the given type. + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the given type. value is undefined if there is an error. + * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. + */ + template + inline void tie(T &value, error_code &error) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Read this element as a boolean. + * + * @return The boolean value + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. + */ + inline operator bool() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline explicit operator const char*() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline operator std::string_view() const noexcept(false); + + /** + * Read this element as an unsigned integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer does not fit in 64 bits or is negative + */ + inline operator uint64_t() const noexcept(false); + /** + * Read this element as an signed integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer does not fit in 64 bits + */ + inline operator int64_t() const noexcept(false); + /** + * Read this element as an double. + * + * @return The double value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number + */ + inline operator double() const noexcept(false); + /** + * Read this element as a JSON array. + * + * @return The JSON array. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline operator array() const noexcept(false); + /** + * Read this element as a JSON object (key/value pairs). + * + * @return The JSON object. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object + */ + inline operator object() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The beginning of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator begin() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The end of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at_pointer("/foo/a/1") == 20 + * doc.at_pointer("/foo")["a"].at(1) == 20 + * doc.at_pointer("")["foo"]["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * + * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard + * and allowed the following : + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at("foo/a/1") == 20 + * + * Though it is intuitive, it is not compliant with RFC 6901 + * https://tools.ietf.org/html/rfc6901 + * + * For standard compliance, use the at_pointer function instead. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + inline simdjson_result at(const std::string_view json_pointer) const noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + /** + * Get the value at the given index. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * + * Note: The key will be matched against **unescaped** JSON. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + + /** + * operator< defines a total order for element allowing to use them in + * ordered C++ STL containers + * + * @return TRUE if the key appears before the other one in the tape + */ + inline bool operator<(const element &other) const noexcept; + + /** + * operator== allows to verify if two element values reference the + * same JSON item + * + * @return TRUE if the two values references the same JSON element + */ + inline bool operator==(const element &other) const noexcept; + + /** @private for debugging. Prints out the root element. */ + inline bool dump_raw_tape(std::ostream &out) const noexcept; + +private: + simdjson_inline element(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class document; + friend class object; + friend class array; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; + +}; + +} // namespace dom + +/** The result of a JSON navigation that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result type() const noexcept; + template + simdjson_inline bool is() const noexcept; + template + simdjson_inline simdjson_result get() const noexcept; + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + simdjson_inline simdjson_result get_array() const noexcept; + simdjson_inline simdjson_result get_object() const noexcept; + simdjson_inline simdjson_result get_c_str() const noexcept; + simdjson_inline simdjson_result get_string_length() const noexcept; + simdjson_inline simdjson_result get_string() const noexcept; + simdjson_inline simdjson_result get_int64() const noexcept; + simdjson_inline simdjson_result get_uint64() const noexcept; + simdjson_inline simdjson_result get_double() const noexcept; + simdjson_inline simdjson_result get_bool() const noexcept; + + simdjson_inline bool is_array() const noexcept; + simdjson_inline bool is_object() const noexcept; + simdjson_inline bool is_string() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_number() const noexcept; + simdjson_inline bool is_bool() const noexcept; + simdjson_inline bool is_null() const noexcept; + + simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at(size_t index) const noexcept; + simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator bool() const noexcept(false); + simdjson_inline explicit operator const char*() const noexcept(false); + simdjson_inline operator std::string_view() const noexcept(false); + simdjson_inline operator uint64_t() const noexcept(false); + simdjson_inline operator int64_t() const noexcept(false); + simdjson_inline operator double() const noexcept(false); + simdjson_inline operator dom::array() const noexcept(false); + simdjson_inline operator dom::object() const noexcept(false); + + simdjson_inline dom::array::iterator begin() const noexcept(false); + simdjson_inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/element.h */ +/* including simdjson/dom/object.h: #include "simdjson/dom/object.h" */ +/* begin file simdjson/dom/object.h */ +#ifndef SIMDJSON_DOM_OBJECT_H +#define SIMDJSON_DOM_OBJECT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ + +namespace simdjson { +namespace dom { + +/** + * JSON object. + */ +class object { +public: + /** Create a new, invalid object */ + simdjson_inline object() noexcept; + + class iterator { + public: + using value_type = const key_value_pair; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; + + /** + * Get the actual key/value pair + */ + inline reference operator*() const noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator& operator++() noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline std::string_view key() const noexcept; + /** + * Get the length (in bytes) of the key in this key/value pair. + * You should expect this function to be faster than key().size(). + */ + inline uint32_t key_length() const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view. + */ + inline bool key_equals(std::string_view o) const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view in a case-insensitive manner. + * Case comparisons may only be handled correctly for ASCII strings. + */ + inline bool key_equals_case_insensitive(std::string_view o) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline const char *key_c_str() const noexcept; + /** + * Get the value of this key/value pair. + */ + inline element value() const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class object; + }; + + /** + * Return the first key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the object (number of keys). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("/foo/a/1") == 20 + * obj.at_pointer("/foo")["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * obj.at_pointer("/")["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * It is only guaranteed to work over ASCII inputs. + * + * Note: The key will be matched against **unescaped** JSON. + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +private: + simdjson_inline object(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + +/** + * Key/value pair in an object. + */ +class key_value_pair { +public: + /** key in the key-value pair **/ + std::string_view key; + /** value in the key-value pair **/ + element value; + +private: + simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; + friend class object; +}; + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result operator[](std::string_view key) const noexcept; + inline simdjson_result operator[](const char *key) const noexcept; + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_key(std::string_view key) const noexcept; + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::object::iterator begin() const noexcept(false); + inline dom::object::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_OBJECT_H +/* end file simdjson/dom/object.h */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* including simdjson/dom/serialization.h: #include "simdjson/dom/serialization.h" */ +/* begin file simdjson/dom/serialization.h */ +#ifndef SIMDJSON_SERIALIZATION_H +#define SIMDJSON_SERIALIZATION_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ + +#include + +namespace simdjson { + +/** + * The string_builder template and mini_formatter class + * are not part of our public API and are subject to change + * at any time! + */ +namespace internal { + +template +class base_formatter { +public: + /** Add a comma **/ + simdjson_inline void comma(); + /** Start an array, prints [ **/ + simdjson_inline void start_array(); + /** End an array, prints ] **/ + simdjson_inline void end_array(); + /** Start an array, prints { **/ + simdjson_inline void start_object(); + /** Start an array, prints } **/ + simdjson_inline void end_object(); + /** Prints a true **/ + simdjson_inline void true_atom(); + /** Prints a false **/ + simdjson_inline void false_atom(); + /** Prints a null **/ + simdjson_inline void null_atom(); + /** Prints a number **/ + simdjson_inline void number(int64_t x); + /** Prints a number **/ + simdjson_inline void number(uint64_t x); + /** Prints a number **/ + simdjson_inline void number(double x); + /** Prints a key (string + colon) **/ + simdjson_inline void key(std::string_view unescaped); + /** Prints a string. The string is escaped as needed. **/ + simdjson_inline void string(std::string_view unescaped); + /** Clears out the content. **/ + simdjson_inline void clear(); + /** + * Get access to the buffer, it is owned by the instance, but + * the user can make a copy. + **/ + simdjson_inline std::string_view str() const; + + /** Prints one character **/ + simdjson_inline void one_char(char c); + + simdjson_inline void call_print_newline() { + static_cast(this)->print_newline(); + } + + simdjson_inline void call_print_indents(size_t depth) { + static_cast(this)->print_indents(depth); + } + + simdjson_inline void call_print_space() { + static_cast(this)->print_space(); + } + +protected: + // implementation details (subject to change) + /** Backing buffer **/ + std::vector buffer{}; // not ideal! +}; + + +/** + * @private This is the class that we expect to use with the string_builder + * template. It tries to produce a compact version of the JSON element + * as quickly as possible. + */ +class mini_formatter : public base_formatter { +public: + simdjson_inline void print_newline(); + + simdjson_inline void print_indents(size_t depth); + + simdjson_inline void print_space(); +}; + +class pretty_formatter : public base_formatter { +public: + simdjson_inline void print_newline(); + + simdjson_inline void print_indents(size_t depth); + + simdjson_inline void print_space(); + +protected: + int indent_step = 4; +}; + +/** + * @private The string_builder template allows us to construct + * a string from a document element. It is parametrized + * by a "formatter" which handles the details. Thus + * the string_builder template could support both minification + * and prettification, and various other tradeoffs. + */ +template +class string_builder { +public: + /** Construct an initially empty builder, would print the empty string **/ + string_builder() = default; + /** Append an element to the builder (to be printed) **/ + inline void append(simdjson::dom::element value); + /** Append an array to the builder (to be printed) **/ + inline void append(simdjson::dom::array value); + /** Append an object to the builder (to be printed) **/ + inline void append(simdjson::dom::object value); + /** Reset the builder (so that it would print the empty string) **/ + simdjson_inline void clear(); + /** + * Get access to the string. The string_view is owned by the builder + * and it is invalid to use it after the string_builder has been + * destroyed. + * However you can make a copy of the string_view on memory that you + * own. + */ + simdjson_inline std::string_view str() const; + /** Append a key_value_pair to the builder (to be printed) **/ + simdjson_inline void append(simdjson::dom::key_value_pair value); +private: + formatter format{}; +}; + +} // internal + +namespace dom { + +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +} // namespace dom + +/** + * Converts JSON to a string. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << to_string(doc) << endl; // prints [1,2,3] + * + */ +template +std::string to_string(T x) { + // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ + // Currently minify and to_string are identical but in the future, they may + // differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} +#if SIMDJSON_EXCEPTIONS +template +std::string to_string(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Minifies a JSON element or document, printing the smallest possible valid JSON. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << minify(doc) << endl; // prints [1,2,3] + * + */ +template +std::string minify(T x) { + return to_string(x); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string minify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Prettifies a JSON element or document, printing the valid JSON with indentation. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * + * // Prints: + * // { + * // [ + * // 1, + * // 2, + * // 3 + * // ] + * // } + * cout << prettify(doc) << endl; + * + */ +template +std::string prettify(T x) { + simdjson::internal::string_builder sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string prettify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +} // namespace simdjson + + +#endif +/* end file simdjson/dom/serialization.h */ + +// Inline functions +/* including simdjson/dom/array-inl.h: #include "simdjson/dom/array-inl.h" */ +/* begin file simdjson/dom/array-inl.h */ +#ifndef SIMDJSON_ARRAY_INL_H +#define SIMDJSON_ARRAY_INL_H + +#include + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/array.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* including simdjson/internal/tape_ref-inl.h: #include "simdjson/internal/tape_ref-inl.h" */ +/* begin file simdjson/internal/tape_ref-inl.h */ +#ifndef SIMDJSON_TAPE_REF_INL_H +#define SIMDJSON_TAPE_REF_INL_H + +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ +/* including simdjson/internal/tape_type.h: #include "simdjson/internal/tape_type.h" */ +/* begin file simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file simdjson/internal/tape_type.h */ + +#include + +namespace simdjson { +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + +// +// tape_ref inline implementation +// +simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + + +simdjson_inline bool tape_ref::is_document_root() const noexcept { + return json_index == 1; // should we ever change the structure of the tape, this should get updated. +} +simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). +} +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +simdjson_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +simdjson_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +simdjson_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +simdjson_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +simdjson_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast(doc->tape[json_index] >> 56); +} +simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template +simdjson_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_TAPE_REF_INL_H +/* end file simdjson/internal/tape_ref-inl.h */ + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} + +namespace dom { + +// +// array inline implementation +// +simdjson_inline array::array() noexcept : tape{} {} +simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline array::iterator array::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t array::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.scope_count(); +} +inline size_t array::number_of_slots() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.matching_brace_index() - tape.json_index; +} +inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at(size_t index) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +// +// array::iterator inline implementation +// +simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline array::iterator array::iterator::operator++(int) noexcept { + array::iterator out = *this; + ++*this; + return out; +} +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool array::iterator::operator==(const array::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool array::iterator::operator<(const array::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool array::iterator::operator>(const array::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} + +} // namespace dom + + +} // namespace simdjson + +/* including simdjson/dom/element-inl.h: #include "simdjson/dom/element-inl.h" */ +/* begin file simdjson/dom/element-inl.h */ +#ifndef SIMDJSON_ELEMENT_INL_H +#define SIMDJSON_ELEMENT_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + +/* including simdjson/dom/object-inl.h: #include "simdjson/dom/object-inl.h" */ +/* begin file simdjson/dom/object-inl.h */ +#ifndef SIMDJSON_OBJECT_INL_H +#define SIMDJSON_OBJECT_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ + +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +inline dom::object::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::object::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// object inline implementation +// +simdjson_inline object::object() noexcept : tape{} {} +simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline object::iterator object::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline object::iterator object::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t object::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.scope_count(); +} + +inline simdjson_result object::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result object::operator[](const char *key) const noexcept { + return at_key(key); +} +inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = at_key(unescaped); + } else { + child = at_key(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_key(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} +// In case you wonder why we need this, please see +// https://github.com/simdjson/simdjson/issues/323 +// People do seek keys in a case-insensitive manner. +inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals_case_insensitive(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} + +// +// object::iterator inline implementation +// +simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline const key_value_pair object::iterator::operator*() const noexcept { + return key_value_pair(key(), value()); +} +inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool object::iterator::operator==(const object::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool object::iterator::operator<(const object::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool object::iterator::operator>(const object::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} +inline object::iterator& object::iterator::operator++() noexcept { + tape.json_index++; + tape.json_index = tape.after_element(); + return *this; +} +inline object::iterator object::iterator::operator++(int) noexcept { + object::iterator out = *this; + ++*this; + return out; +} +inline std::string_view object::iterator::key() const noexcept { + return tape.get_string_view(); +} +inline uint32_t object::iterator::key_length() const noexcept { + return tape.get_string_length(); +} +inline const char* object::iterator::key_c_str() const noexcept { + return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); +} +inline element object::iterator::value() const noexcept { + return element(internal::tape_ref(tape.doc, tape.json_index + 1)); +} + +/** + * Design notes: + * Instead of constructing a string_view and then comparing it with a + * user-provided strings, it is probably more performant to have dedicated + * functions taking as a parameter the string we want to compare against + * and return true when they are equal. That avoids the creation of a temporary + * std::string_view. Though it is possible for the compiler to avoid entirely + * any overhead due to string_view, relying too much on compiler magic is + * problematic: compiler magic sometimes fail, and then what do you do? + * Also, enticing users to rely on high-performance function is probably better + * on the long run. + */ + +inline bool object::iterator::key_equals(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // We avoid construction of a temporary string_view instance. + return (memcmp(o.data(), key_c_str(), len) == 0); + } + return false; +} + +inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // See For case-insensitive string comparisons, avoid char-by-char functions + // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ + // Note that it might be worth rolling our own strncasecmp function, with vectorization. + return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); + } + return false; +} +// +// key_value_pair inline implementation +// +inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : + key(_key), value(_value) {} + +} // namespace dom + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_OBJECT_INL_H +/* end file simdjson/dom/object-inl.h */ +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::element &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +inline simdjson_result simdjson_result::type() const noexcept { + if (error()) { return error(); } + return first.type(); +} + +template +simdjson_inline bool simdjson_result::is() const noexcept { + return !error() && first.is(); +} +template +simdjson_inline simdjson_result simdjson_result::get() const noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) const noexcept { + if (error()) { return error(); } + return first.get(value); +} + +simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { + if (error()) { return error(); } + return first.get_c_str(); +} +simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} +simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { + if (error()) { return error(); } + return first.get_bool(); +} + +simdjson_inline bool simdjson_result::is_array() const noexcept { + return !error() && first.is_array(); +} +simdjson_inline bool simdjson_result::is_object() const noexcept { + return !error() && first.is_object(); +} +simdjson_inline bool simdjson_result::is_string() const noexcept { + return !error() && first.is_string(); +} +simdjson_inline bool simdjson_result::is_int64() const noexcept { + return !error() && first.is_int64(); +} +simdjson_inline bool simdjson_result::is_uint64() const noexcept { + return !error() && first.is_uint64(); +} +simdjson_inline bool simdjson_result::is_double() const noexcept { + return !error() && first.is_double(); +} +simdjson_inline bool simdjson_result::is_number() const noexcept { + return !error() && first.is_number(); +} +simdjson_inline bool simdjson_result::is_bool() const noexcept { + return !error() && first.is_bool(); +} + +simdjson_inline bool simdjson_result::is_null() const noexcept { + return !error() && first.is_null(); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + if (error()) { return error(); } + return first.at(json_pointer); +SIMDJSON_POP_DISABLE_WARNINGS +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +simdjson_inline simdjson_result::operator bool() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator const char *() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator double() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { + return get(); +} + +simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// element inline implementation +// +simdjson_inline element::element() noexcept : tape{} {} +simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } + +inline element_type element::type() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + auto tape_type = tape.tape_ref_type(); + return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); +} + +inline simdjson_result element::get_bool() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(tape.is_true()) { + return true; + } else if(tape.is_false()) { + return false; + } + return INCORRECT_TYPE; +} +inline simdjson_result element::get_c_str() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_c_str(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string_length() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: + return tape.get_string_view(); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_uint64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken + if(tape.is_int64()) { + int64_t result = tape.next_tape_value(); + if (result < 0) { + return NUMBER_OUT_OF_RANGE; + } + return uint64_t(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_int64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken + if(tape.is_uint64()) { + uint64_t result = tape.next_tape_value(); + // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std + if (result > uint64_t((std::numeric_limits::max)())) { + return NUMBER_OUT_OF_RANGE; + } + return static_cast(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_double() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + // Performance considerations: + // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight + // comparison. + // 2. Using a switch-case relies on the compiler guessing what kind of code generation + // we want... But the compiler cannot know that we expect the type to be "double" + // most of the time. + // We can expect get to refer to a double type almost all the time. + // It is important to craft the code accordingly so that the compiler can use this + // information. (This could also be solved with profile-guided optimization.) + if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken + if(tape.is_uint64()) { + return double(tape.next_tape_value()); + } else if(tape.is_int64()) { + return double(tape.next_tape_value()); + } + return INCORRECT_TYPE; + } + // this is common: + return tape.next_tape_value(); +} +inline simdjson_result element::get_array() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_ARRAY: + return array(tape); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_object() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape); + default: + return INCORRECT_TYPE; + } +} + +template +simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept { + return get().get(value); +} +// An element-specific version prevents recursion with simdjson_result::get(value) +template<> +simdjson_warn_unused simdjson_inline error_code element::get(element &value) const noexcept { + value = element(tape); + return SUCCESS; +} +template +inline void element::tie(T &value, error_code &error) && noexcept { + error = get(value); +} + +template +simdjson_inline bool element::is() const noexcept { + auto result = get(); + return !result.error(); +} + +template<> inline simdjson_result element::get() const noexcept { return get_array(); } +template<> inline simdjson_result element::get() const noexcept { return get_object(); } +template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } +template<> inline simdjson_result element::get() const noexcept { return get_string(); } +template<> inline simdjson_result element::get() const noexcept { return get_int64(); } +template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } +template<> inline simdjson_result element::get() const noexcept { return get_double(); } +template<> inline simdjson_result element::get() const noexcept { return get_bool(); } + +inline bool element::is_array() const noexcept { return is(); } +inline bool element::is_object() const noexcept { return is(); } +inline bool element::is_string() const noexcept { return is(); } +inline bool element::is_int64() const noexcept { return is(); } +inline bool element::is_uint64() const noexcept { return is(); } +inline bool element::is_double() const noexcept { return is(); } +inline bool element::is_bool() const noexcept { return is(); } +inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } + +inline bool element::is_null() const noexcept { + return tape.is_null_on_tape(); +} + +#if SIMDJSON_EXCEPTIONS + +inline element::operator bool() const noexcept(false) { return get(); } +inline element::operator const char*() const noexcept(false) { return get(); } +inline element::operator std::string_view() const noexcept(false) { return get(); } +inline element::operator uint64_t() const noexcept(false) { return get(); } +inline element::operator int64_t() const noexcept(false) { return get(); } +inline element::operator double() const noexcept(false) { return get(); } +inline element::operator array() const noexcept(false) { return get(); } +inline element::operator object() const noexcept(false) { return get(); } + +inline array::iterator element::begin() const noexcept(false) { + return get().begin(); +} +inline array::iterator element::end() const noexcept(false) { + return get().end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result element::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result element::operator[](const char *key) const noexcept { + return at_key(key); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_pointer(json_pointer); + case internal::tape_type::START_ARRAY: + return array(tape).at_pointer(json_pointer); + default: { + if (!json_pointer.empty()) { // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } + // an empty string means that we return the current node + dom::element copy(*this); + return simdjson_result(std::move(copy)); + } + } +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +inline simdjson_result element::at(std::string_view json_pointer) const noexcept { + // version 0.4 of simdjson allowed non-compliant pointers + auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); + return at_pointer(std_pointer); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline simdjson_result element::at(size_t index) const noexcept { + return get().at(index); +} +inline simdjson_result element::at_key(std::string_view key) const noexcept { + return get().at_key(key); +} +inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { + return get().at_key_case_insensitive(key); +} +inline bool element::operator<(const element &other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool element::operator==(const element &other) const noexcept { + return tape.json_index == other.tape.json_index; +} + +inline bool element::dump_raw_tape(std::ostream &out) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.doc->dump_raw_tape(out); +} + + +inline std::ostream& operator<<(std::ostream& out, element_type type) { + switch (type) { + case element_type::ARRAY: + return out << "array"; + case element_type::OBJECT: + return out << "object"; + case element_type::INT64: + return out << "int64_t"; + case element_type::UINT64: + return out << "uint64_t"; + case element_type::DOUBLE: + return out << "double"; + case element_type::STRING: + return out << "string"; + case element_type::BOOL: + return out << "bool"; + case element_type::NULL_VALUE: + return out << "null"; + default: + return out << "unexpected content!!!"; // abort() usage is forbidden in the library + } +} + +} // namespace dom + +} // namespace simdjson + +#endif // SIMDJSON_ELEMENT_INL_H +/* end file simdjson/dom/element-inl.h */ + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_ARRAY_INL_H +/* end file simdjson/dom/array-inl.h */ +/* including simdjson/dom/document_stream-inl.h: #include "simdjson/dom/document_stream-inl.h" */ +/* begin file simdjson/dom/document_stream-inl.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_INL_H +#define SIMDJSON_DOCUMENT_STREAM_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* including simdjson/dom/parser-inl.h: #include "simdjson/dom/parser-inl.h" */ +/* begin file simdjson/dom/parser-inl.h */ +#ifndef SIMDJSON_PARSER_INL_H +#define SIMDJSON_PARSER_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/padded_string-inl.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream-inl.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ + +#include +#include /* memcmp */ + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +simdjson_inline parser::parser(parser &&other) noexcept = default; +simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } + +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 len = _ftelli64(fp); + if(len == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result parser::load(const std::string &path) & noexcept { + return load_into_document(doc, path); +} + +inline simdjson_result parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse_into_document(provided_doc, loaded_bytes.get(), len, false); +} + +inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); +} + +inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + // Important: we need to ensure that document has enough capacity. + // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! + error_code _error = ensure_capacity(provided_doc, len); + if (_error) { return _error; } + if (realloc_if_needed) { + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); + buf = reinterpret_cast(loaded_bytes.get()); + } + + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + _error = implementation->parse(buf, len, provided_doc); + + if (_error) { return _error; } + + return provided_doc.root(); +} + +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), false); +} + + +inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(doc, buf, len, realloc_if_needed); +} + +simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse(reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} +simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { + return parse(v.data(), v.length(), false); +} + +inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +simdjson_warn_unused +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation if needed + // + error_code err; + if (implementation) { + err = implementation->allocate(capacity, max_depth); + } else { + err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + return SUCCESS; +} + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_warn_unused +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + return ensure_capacity(doc, desired_capacity); +} + + +inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { + // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. + // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. + if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } + // If we don't have enough capacity, (try to) automatically bump it. + // If the document needs allocation, do it too. + // Both in one if statement to minimize unlikely branching. + // + // Note: we must make sure that this function is called if capacity() == 0. We do so because we + // ensure that desired_capacity > 0. + if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; + error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; + if(err1 != SUCCESS) { return error = err1; } + if(err2 != SUCCESS) { return error = err2; } + } + return SUCCESS; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_PARSER_INL_H +/* end file simdjson/dom/parser-inl.h */ +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} +#endif + +simdjson_inline document_stream::document_stream( + dom::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change +#endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) +#endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept { +#ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); +#endif +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { + // Note that in case of error, we do not yet mark + // the iterator as "finished": this detection is done + // in the operator++ function since it is possible + // to call operator++ repeatedly while omitting + // calls to operator*. + if (stream->error) { return stream->error; } + return stream->parser->doc.root(); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->ensure_capacity(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } +#ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread if needed + error = stage1_thread_parser.ensure_capacity(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } +#endif // SIMDJSON_THREADS_ENABLED + next(); +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + const char* start = reinterpret_cast(stream->buf) + current_index(); + bool object_or_array = ((*start == '[') || (*start == '{')); + if(object_or_array) { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; + return std::string_view(start, next_doc_index - current_index() + 1); + } else { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; + size_t svlen = next_doc_index - current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } +} + + +inline void document_stream::next() noexcept { + // We always exit at once, once in an error condition. + if (error) { return; } + + // Load the next document from the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + // If that was the last document in the batch, load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + +#ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } +#else + error = run_stage1(*parser, batch_start); +#endif + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + // Run stage 2 on the first document in the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + } +} +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(*parser, stage1_thread_parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +simdjson_inline simdjson_result::simdjson_result() noexcept + : simdjson_result_base() { +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : simdjson_result_base(error) { +} +simdjson_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept + : simdjson_result_base(std::forward(value)) { +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { + first.error = error(); + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { + first.error = error(); + return first.end(); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson +#endif // SIMDJSON_DOCUMENT_STREAM_INL_H +/* end file simdjson/dom/document_stream-inl.h */ +/* including simdjson/dom/document-inl.h: #include "simdjson/dom/document-inl.h" */ +/* begin file simdjson/dom/document-inl.h */ +#ifndef SIMDJSON_DOCUMENT_INL_H +#define SIMDJSON_DOCUMENT_INL_H + +// Inline implementations go in here. + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +/* including simdjson/internal/jsonformatutils.h: #include "simdjson/internal/jsonformatutils.h" */ +/* begin file simdjson/internal/jsonformatutils.h */ +#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H +#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include +#include +#include + +namespace simdjson { +namespace internal { + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); + +class escape_json_string { +public: + escape_json_string(std::string_view _str) noexcept : str{_str} {} + operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } +private: + std::string_view str; + friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +}; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { + for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { + // TODO can this be done once at the beginning, or will it mess up << char? + std::ios::fmtflags f(out.flags()); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); + out.flags(f); + } else { + out << unescaped.str[i]; + } + } + } + return out; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H +/* end file simdjson/internal/jsonformatutils.h */ + +#include + +namespace simdjson { +namespace dom { + +// +// document inline implementation +// +inline element document::root() const noexcept { + return element(internal::tape_ref(this, 1)); +} +simdjson_warn_unused +inline size_t document::capacity() const noexcept { + return allocated_capacity; +} + +simdjson_warn_unused +inline error_code document::allocate(size_t capacity) noexcept { + if (capacity == 0) { + string_buf.reset(); + tape.reset(); + allocated_capacity = 0; + return SUCCESS; + } + + // a pathological input like "[[[[..." would generate capacity tape elements, so + // need a capacity of at least capacity + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where capacity + 1 tape elements are + // generated, see issue https://github.com/simdjson/simdjson/issues/345 + size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); + // a document with only zero-length strings... could have capacity/3 string + // and we would need capacity/3 * 5 bytes on the string buffer + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); + tape.reset(new (std::nothrow) uint64_t[tape_capacity]); + if(!(string_buf && tape)) { + allocated_capacity = 0; + string_buf.reset(); + tape.reset(); + return MEMALLOC; + } + // Technically the allocated_capacity might be larger than capacity + // so the next line is pessimistic. + allocated_capacity = capacity; + return SUCCESS; +} + +inline bool document::dump_raw_tape(std::ostream &os) const noexcept { + uint32_t string_length; + size_t tape_idx = 0; + uint64_t tape_val = tape[tape_idx]; + uint8_t type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type; + tape_idx++; + size_t how_many = 0; + if (type == 'r') { + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); + } else { + // Error: no starting root node? + return false; + } + os << "\t// pointing to " << how_many << " (right after last node)\n"; + uint64_t payload; + for (; tape_idx < how_many; tape_idx++) { + os << tape_idx << " : "; + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + os << internal::escape_json_string(std::string_view( + reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), + string_length + )); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tape_idx + 1 >= how_many) { + return false; + } + os << "integer " << static_cast(tape[++tape_idx]) << "\n"; + break; + case 'u': // we have a long uint + if (tape_idx + 1 >= how_many) { + return false; + } + os << "unsigned integer " << tape[++tape_idx] << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tape_idx + 1 >= how_many) { + return false; + } + double answer; + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; case '}': // we end an object + os << "}\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case 'r': // we start and end with the root node + // should we be hitting the root node? + return false; + default: + return false; + } + } + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type << "\t// pointing to " << payload + << " (start root)\n"; + return true; +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_INL_H +/* end file simdjson/dom/document-inl.h */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +/* including simdjson/dom/serialization-inl.h: #include "simdjson/dom/serialization-inl.h" */ +/* begin file simdjson/dom/serialization-inl.h */ + +#ifndef SIMDJSON_SERIALIZATION_INL_H +#define SIMDJSON_SERIALIZATION_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/serialization.h" */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + +/* skipped duplicate #include "simdjson/dom/array-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ + +#include + +namespace simdjson { +namespace dom { +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + simdjson::internal::string_builder<> sb; + sb.append(doc.root()); + std::string_view answer = sb.str(); + os << answer; + return true; +} + +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif + +} // namespace dom + +/*** + * Number utility functions + **/ +namespace { +/**@private + * Escape sequence like \b or \u0001 + * We expect that most compilers will use 8 bytes for this data structure. + **/ +struct escape_sequence { + uint8_t length; + const char string[7]; // technically, we only ever need 6 characters, we pad to 8 +}; +/**@private + * This converts a signed integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 20 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, int64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + uint64_t value_positive; + // In general, negating a signed integer is unsafe. + if(value < 0) { + *output++ = '-'; + // Doing value_positive = -value; while avoiding + // undefined behavior warnings. + // It assumes two complement's which is universal at this + // point in time. + std::memcpy(&value_positive, &value, sizeof(value)); + value_positive = (~value_positive) + 1; // this is a negation + } else { + value_positive = value; + } + // We work solely with value_positive. It *might* be easier + // for an optimizing compiler to deal with an unsigned variable + // as far as performance goes. + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value_positive >= 10) { + *write_pointer-- = char('0' + (value_positive % 10)); + value_positive /= 10; + } + *write_pointer = char('0' + value_positive); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +/**@private + * This converts an unsigned integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 19 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, uint64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value >= 10) { + *write_pointer-- = char('0' + (value % 10)); + value /= 10; + }; + *write_pointer = char('0' + value); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} + + +} // anonymous namespace +namespace internal { + +/*** + * Minifier/formatter code. + **/ + +template +simdjson_inline void base_formatter::number(uint64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::number(int64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::number(double x) { + char number_buffer[24]; + // Currently, passing the nullptr to the second argument is + // safe because our implementation does not check the second + // argument. + char *newp = internal::to_chars(number_buffer, nullptr, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::start_array() { one_char('['); } + + +template +simdjson_inline void base_formatter::end_array() { one_char(']'); } + +template +simdjson_inline void base_formatter::start_object() { one_char('{'); } + +template +simdjson_inline void base_formatter::end_object() { one_char('}'); } + +template +simdjson_inline void base_formatter::comma() { one_char(','); } + +template +simdjson_inline void base_formatter::true_atom() { + const char * s = "true"; + buffer.insert(buffer.end(), s, s + 4); +} + +template +simdjson_inline void base_formatter::false_atom() { + const char * s = "false"; + buffer.insert(buffer.end(), s, s + 5); +} + +template +simdjson_inline void base_formatter::null_atom() { + const char * s = "null"; + buffer.insert(buffer.end(), s, s + 4); +} + +template +simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } + +template +simdjson_inline void base_formatter::key(std::string_view unescaped) { + string(unescaped); + one_char(':'); +} + +template +simdjson_inline void base_formatter::string(std::string_view unescaped) { + one_char('\"'); + size_t i = 0; + // Fast path for the case where we have no control character, no ", and no backslash. + // This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for(;i + 8 <= unescaped.length(); i += 8) { + // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] + ) { break; } + } + for(;i < unescaped.length(); i++) { + if(needs_escaping[uint8_t(unescaped[i])]) { break; } + } + // The following is also possible and omits a 256-byte table, but it is slower: + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} + + // At least for long strings, the following should be fast. We could + // do better by integrating the checks and the insertion. + buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + // We caught a control character if we enter this loop (slow). + // Note that we are do not restart from the beginning, but rather we continue + // from the point where we encountered something that requires escaping. + for (; i < unescaped.length(); i++) { + switch (unescaped[i]) { + case '\"': + { + const char * s = "\\\""; + buffer.insert(buffer.end(), s, s + 2); + } + break; + case '\\': + { + const char * s = "\\\\"; + buffer.insert(buffer.end(), s, s + 2); + } + break; + default: + if (uint8_t(unescaped[i]) <= 0x1F) { + // If packed, this uses 8 * 32 bytes. + // Note that we expect most compilers to embed this code in the data + // section. + constexpr static escape_sequence escaped[32] = { + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + auto u = escaped[uint8_t(unescaped[i])]; + buffer.insert(buffer.end(), u.string, u.string + u.length); + } else { + one_char(unescaped[i]); + } + } // switch + } // for + one_char('\"'); +} + + +template +inline void base_formatter::clear() { + buffer.clear(); +} + +template +simdjson_inline std::string_view base_formatter::str() const { + return std::string_view(buffer.data(), buffer.size()); +} + +simdjson_inline void mini_formatter::print_newline() { + return; +} + +simdjson_inline void mini_formatter::print_indents(size_t depth) { + (void)depth; + return; +} + +simdjson_inline void mini_formatter::print_space() { + return; +} + +simdjson_inline void pretty_formatter::print_newline() { + one_char('\n'); +} + +simdjson_inline void pretty_formatter::print_indents(size_t depth) { + if(this->indent_step <= 0) { + return; + } + for(size_t i = 0; i < this->indent_step * depth; i++) { + one_char(' '); + } +} + +simdjson_inline void pretty_formatter::print_space() { + one_char(' '); +} + +/*** + * String building code. + **/ + +template +inline void string_builder::append(simdjson::dom::element value) { + // using tape_type = simdjson::internal::tape_type; + size_t depth = 0; + constexpr size_t MAX_DEPTH = 16; + bool is_object[MAX_DEPTH]; + is_object[0] = false; + bool after_value = false; + + internal::tape_ref iter(value.tape); + do { + // print commas after each value + if (after_value) { + format.comma(); + format.print_newline(); + } + + format.print_indents(depth); + + // If we are in an object, print the next key and :, and skip to the next + // value. + if (is_object[depth]) { + format.key(iter.get_string_view()); + format.print_space(); + iter.json_index++; + } + switch (iter.tape_ref_type()) { + + // Arrays + case tape_type::START_ARRAY: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] + depth--; + break; + } + + // Output start [ + format.start_array(); + iter.json_index++; + + // Handle empty [] (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + depth--; + break; + } + + is_object[depth] = false; + after_value = false; + format.print_newline(); + continue; + } + + // Objects + case tape_type::START_OBJECT: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } + depth--; + break; + } + + // Output start { + format.start_object(); + iter.json_index++; + + // Handle empty {} (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_OBJECT) { + format.end_object(); + depth--; + break; + } + + is_object[depth] = true; + after_value = false; + format.print_newline(); + continue; + } + + // Scalars + case tape_type::STRING: + format.string(iter.get_string_view()); + break; + case tape_type::INT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::UINT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::DOUBLE: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::TRUE_VALUE: + format.true_atom(); + break; + case tape_type::FALSE_VALUE: + format.false_atom(); + break; + case tape_type::NULL_VALUE: + format.null_atom(); + break; + + // These are impossible + case tape_type::END_ARRAY: + case tape_type::END_OBJECT: + case tape_type::ROOT: + SIMDJSON_UNREACHABLE(); + } + iter.json_index++; + after_value = true; + + // Handle multiple ends in a row + while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || + iter.tape_ref_type() == tape_type::END_OBJECT)) { + format.print_newline(); + depth--; + format.print_indents(depth); + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + } else { + format.end_object(); + } + iter.json_index++; + } + + // Stop when we're at depth 0 + } while (depth != 0); + + format.print_newline(); +} + +template +inline void string_builder::append(simdjson::dom::object value) { + format.start_object(); + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + append(*pair); + for (++pair; pair != end; ++pair) { + format.comma(); + append(*pair); + } + } + format.end_object(); +} + +template +inline void string_builder::append(simdjson::dom::array value) { + format.start_array(); + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + append(*iter); + for (++iter; iter != end; ++iter) { + format.comma(); + append(*iter); + } + } + format.end_array(); +} + +template +simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + format.key(kv.key); + append(kv.value); +} + +template +simdjson_inline void string_builder::clear() { + format.clear(); +} + +template +simdjson_inline std::string_view string_builder::str() const { + return format.str(); +} + + +} // namespace internal +} // namespace simdjson + +#endif +/* end file simdjson/dom/serialization-inl.h */ + +#endif // SIMDJSON_DOM_H +/* end file simdjson/dom.h */ +/* including simdjson/ondemand.h: #include "simdjson/ondemand.h" */ +/* begin file simdjson/ondemand.h */ +#ifndef SIMDJSON_ONDEMAND_H +#define SIMDJSON_ONDEMAND_H + +/* including simdjson/builtin/ondemand.h: #include "simdjson/builtin/ondemand.h" */ +/* begin file simdjson/builtin/ondemand.h */ +#ifndef SIMDJSON_BUILTIN_ONDEMAND_H +#define SIMDJSON_BUILTIN_ONDEMAND_H + +/* including simdjson/builtin.h: #include "simdjson/builtin.h" */ +/* begin file simdjson/builtin.h */ +#ifndef SIMDJSON_BUILTIN_H +#define SIMDJSON_BUILTIN_H + +/* including simdjson/builtin/base.h: #include "simdjson/builtin/base.h" */ +/* begin file simdjson/builtin/base.h */ +#ifndef SIMDJSON_BUILTIN_BASE_H +#define SIMDJSON_BUILTIN_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/implementation_detection.h: #include "simdjson/implementation_detection.h" */ +/* begin file simdjson/implementation_detection.h */ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H + +/* skipped duplicate #include "simdjson/base.h" */ + +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#define SIMDJSON_IMPLEMENTATION_ID_lsx 7 +#define SIMDJSON_IMPLEMENTATION_ID_lasx 8 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 0 +#endif + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#endif + +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif + +#if (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 0 +#endif + +#ifndef SIMDJSON_IMPLEMENTATION_LASX +#define SIMDJSON_IMPLEMENTATION_LASX (SIMDJSON_IS_LOONGARCH64 && __loongarch_asx) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LASX (SIMDJSON_IMPLEMENTATION_LASX) + +#ifndef SIMDJSON_IMPLEMENTATION_LSX +#if SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_IMPLEMENTATION_LSX 0 +#else +#define SIMDJSON_IMPLEMENTATION_LSX (SIMDJSON_IS_LOONGARCH64 && __loongarch_sx) +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LSX (SIMDJSON_IMPLEMENTATION_LSX) + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 || SIMDJSON_CAN_ALWAYS_RUN_LSX || SIMDJSON_CAN_ALWAYS_RUN_LASX +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_LSX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lsx +#elif SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lasx +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H +/* end file simdjson/implementation_detection.h */ + +namespace simdjson { +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) + namespace arm64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) + namespace fallback {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) + namespace haswell {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) + namespace icelake {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) + namespace ppc64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) + namespace westmere {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) + namespace lsx {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) + namespace lasx {} +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + + /** + * Represents the best statically linked simdjson implementation that can be used by the compiling + * program. + * + * Detects what options the program is compiled against, and picks the minimum implementation that + * will work on any computer that can run the program. For example, if you compile with g++ + * -march=westmere, it will pick the westmere implementation. The haswell implementation will + * still be available, and can be selected at runtime, but the builtin implementation (and any + * code that uses it) will use westmere. + */ + namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_BASE_H +/* end file simdjson/builtin/base.h */ +/* including simdjson/builtin/implementation.h: #include "simdjson/builtin/implementation.h" */ +/* begin file simdjson/builtin/implementation.h */ +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION_H +#define SIMDJSON_BUILTIN_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/dependencies.h: #include "simdjson/generic/dependencies.h" */ +/* begin file simdjson/generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H + +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/implementation_detection.h" */ +/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ +/* begin file simdjson/internal/instruction_set.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + +namespace simdjson { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + LSX = 0x20000, + LASX = 0x40000, +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H +/* end file simdjson/internal/instruction_set.h */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* including simdjson/internal/jsoncharutils_tables.h: #include "simdjson/internal/jsoncharutils_tables.h" */ +/* begin file simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file simdjson/internal/jsoncharutils_tables.h */ +/* including simdjson/internal/numberparsing_tables.h: #include "simdjson/internal/numberparsing_tables.h" */ +/* begin file simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file simdjson/internal/numberparsing_tables.h */ +/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ +/* begin file simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H +/* end file simdjson/generic/dependencies.h */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/implementation.h: #include "simdjson/arm64/implementation.h" */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/implementation.h: #include "simdjson/fallback/implementation.h" */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/implementation.h: #include "simdjson/haswell/implementation.h" */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/implementation.h: #include "simdjson/icelake/implementation.h" */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/implementation.h: #include "simdjson/ppc64/implementation.h" */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/implementation.h: #include "simdjson/westmere/implementation.h" */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx/implementation.h: #include "simdjson/lsx/implementation.h" */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx/implementation.h: #include "simdjson/lasx/implementation.h" */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { + /** + * Function which returns a pointer to an implementation matching the "builtin" implementation. + * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling + * program. If you compile with g++ -march=haswell, this will return the haswell implementation. + * It is handy to be able to check what builtin was used: builtin_implementation()->name(). + */ + const implementation * builtin_implementation(); +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION_H +/* end file simdjson/builtin/implementation.h */ + +/* skipped duplicate #include "simdjson/generic/dependencies.h" */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64.h: #include "simdjson/arm64.h" */ +/* begin file simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for arm64 */ +/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for arm64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for arm64 */ +/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for arm64 */ +/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for arm64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +/* end file simdjson/generic/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_H +/* end file simdjson/arm64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback.h: #include "simdjson/fallback.h" */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell.h: #include "simdjson/haswell.h" */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for haswell */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake.h: #include "simdjson/icelake.h" */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for icelake */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +/* end file simdjson/generic/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_H +/* end file simdjson/icelake.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64.h: #include "simdjson/ppc64.h" */ +/* begin file simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for ppc64 */ +/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for ppc64 */ +/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for ppc64 */ +/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere.h: #include "simdjson/westmere.h" */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx.h: #include "simdjson/lsx.h" */ +/* begin file simdjson/lsx.h */ +#ifndef SIMDJSON_LSX_H +#define SIMDJSON_LSX_H + +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/amalgamated.h for lsx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lsx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lsx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lsx */ +/* including simdjson/generic/jsoncharutils.h for lsx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lsx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lsx */ +/* including simdjson/generic/atomparsing.h for lsx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lsx */ +/* including simdjson/generic/dom_parser_implementation.h for lsx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lsx +} // namespace simdjson + +namespace simdjson { +namespace lsx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lsx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +/* including simdjson/generic/numberparsing.h for lsx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lsx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +/* end file simdjson/generic/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_LSX_H +/* end file simdjson/lsx.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx.h: #include "simdjson/lasx.h" */ +/* begin file simdjson/lasx.h */ +#ifndef SIMDJSON_LASX_H +#define SIMDJSON_LASX_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/amalgamated.h for lasx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lasx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lasx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lasx */ +/* including simdjson/generic/jsoncharutils.h for lasx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lasx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lasx */ +/* including simdjson/generic/atomparsing.h for lasx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lasx */ +/* including simdjson/generic/dom_parser_implementation.h for lasx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lasx +} // namespace simdjson + +namespace simdjson { +namespace lasx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lasx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +/* including simdjson/generic/numberparsing.h for lasx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lasx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +/* end file simdjson/generic/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ + +#endif // SIMDJSON_LASX_H +/* end file simdjson/lasx.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +#endif // SIMDJSON_BUILTIN_H +/* end file simdjson/builtin.h */ +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/ondemand/dependencies.h: #include "simdjson/generic/ondemand/dependencies.h" */ +/* begin file simdjson/generic/ondemand/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/ondemand/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +#define SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H + +// Internal headers needed for ondemand generics. +// All includes not under simdjson/generic/ondemand must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +#endif // SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +/* end file simdjson/generic/ondemand/dependencies.h */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/ondemand.h: #include "simdjson/arm64/ondemand.h" */ +/* begin file simdjson/arm64/ondemand.h */ +#ifndef SIMDJSON_ARM64_ONDEMAND_H +#define SIMDJSON_ARM64_ONDEMAND_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for arm64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for arm64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::arm64::number_type */ +using number_type = simdjson::arm64::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/value.h for arm64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() noexcept(false); + simdjson_inline operator arm64::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for arm64 */ +/* including simdjson/generic/ondemand/logger.h for arm64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator.h for arm64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator.h for arm64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_type.h for arm64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for arm64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +/* including simdjson/generic/ondemand/parser.h for arm64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for arm64 */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator.h for arm64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/document.h for arm64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream.h for arm64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for arm64 */ +/* including simdjson/generic/ondemand/field.h for arm64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for arm64 */ +/* including simdjson/generic/ondemand/object.h for arm64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator.h for arm64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/serialization.h for arm64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace arm64 { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for arm64 */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for arm64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array_iterator &&value +) noexcept + : arm64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : arm64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document-inl.h for arm64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for arm64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +/* including simdjson/generic/ondemand/field-inl.h for arm64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for arm64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +/* including simdjson/generic/ondemand/logger-inl.h for arm64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object-inl.h for arm64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/parser-inl.h for arm64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for arm64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace arm64 { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for arm64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace arm64::ondemand; + arm64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + arm64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + arm64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace arm64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value-inl.h for arm64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_ONDEMAND_H +/* end file simdjson/arm64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ +/* begin file simdjson/fallback/ondemand.h */ +#ifndef SIMDJSON_FALLBACK_ONDEMAND_H +#define SIMDJSON_FALLBACK_ONDEMAND_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::fallback::number_type */ +using number_type = simdjson::fallback::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for fallback */ +/* including simdjson/generic/ondemand/value.h for fallback: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() noexcept(false); + simdjson_inline operator fallback::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for fallback */ +/* including simdjson/generic/ondemand/logger.h for fallback: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator.h for fallback: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator.h for fallback: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_type.h for fallback: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string.h for fallback: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for fallback */ +/* including simdjson/generic/ondemand/parser.h for fallback: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for fallback */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator.h for fallback: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for fallback */ +/* including simdjson/generic/ondemand/document.h for fallback: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for fallback */ +/* including simdjson/generic/ondemand/document_stream.h for fallback: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for fallback */ +/* including simdjson/generic/ondemand/field.h for fallback: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for fallback */ +/* including simdjson/generic/ondemand/object.h for fallback: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator.h for fallback: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for fallback */ +/* including simdjson/generic/ondemand/serialization.h for fallback: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace fallback { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::fallback::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for fallback */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for fallback: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array_iterator &&value +) noexcept + : fallback::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : fallback::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/document-inl.h for fallback: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for fallback */ +/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ +/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ +/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace fallback { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace fallback::ondemand; + fallback::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + fallback::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + fallback::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace fallback { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::fallback::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/value-inl.h for fallback: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +/* end file simdjson/generic/ondemand/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_ONDEMAND_H +/* end file simdjson/fallback/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/ondemand.h: #include "simdjson/haswell/ondemand.h" */ +/* begin file simdjson/haswell/ondemand.h */ +#ifndef SIMDJSON_HASWELL_ONDEMAND_H +#define SIMDJSON_HASWELL_ONDEMAND_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for haswell: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for haswell: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::haswell::number_type */ +using number_type = simdjson::haswell::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for haswell */ +/* including simdjson/generic/ondemand/value.h for haswell: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() noexcept(false); + simdjson_inline operator haswell::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for haswell */ +/* including simdjson/generic/ondemand/logger.h for haswell: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator.h for haswell: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator.h for haswell: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_type.h for haswell: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string.h for haswell: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for haswell */ +/* including simdjson/generic/ondemand/parser.h for haswell: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for haswell */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator.h for haswell: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for haswell */ +/* including simdjson/generic/ondemand/document.h for haswell: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for haswell */ +/* including simdjson/generic/ondemand/document_stream.h for haswell: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace haswell { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for haswell */ +/* including simdjson/generic/ondemand/field.h for haswell: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for haswell */ +/* including simdjson/generic/ondemand/object.h for haswell: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator.h for haswell: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for haswell */ +/* including simdjson/generic/ondemand/serialization.h for haswell: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace haswell { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::haswell::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for haswell */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for haswell: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array_iterator &&value +) noexcept + : haswell::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : haswell::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/document-inl.h for haswell: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for haswell */ +/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ +/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ +/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace haswell { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace haswell::ondemand; + haswell::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + haswell::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + haswell::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace haswell { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::haswell::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/value-inl.h for haswell: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +/* end file simdjson/generic/ondemand/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_ONDEMAND_H +/* end file simdjson/haswell/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/ondemand.h: #include "simdjson/icelake/ondemand.h" */ +/* begin file simdjson/icelake/ondemand.h */ +#ifndef SIMDJSON_ICELAKE_ONDEMAND_H +#define SIMDJSON_ICELAKE_ONDEMAND_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for icelake: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for icelake: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::icelake::number_type */ +using number_type = simdjson::icelake::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for icelake */ +/* including simdjson/generic/ondemand/value.h for icelake: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() noexcept(false); + simdjson_inline operator icelake::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for icelake */ +/* including simdjson/generic/ondemand/logger.h for icelake: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator.h for icelake: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator.h for icelake: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_type.h for icelake: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string.h for icelake: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for icelake */ +/* including simdjson/generic/ondemand/parser.h for icelake: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for icelake */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator.h for icelake: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for icelake */ +/* including simdjson/generic/ondemand/document.h for icelake: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for icelake */ +/* including simdjson/generic/ondemand/document_stream.h for icelake: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace icelake { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for icelake */ +/* including simdjson/generic/ondemand/field.h for icelake: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for icelake */ +/* including simdjson/generic/ondemand/object.h for icelake: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator.h for icelake: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for icelake */ +/* including simdjson/generic/ondemand/serialization.h for icelake: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace icelake { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::icelake::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for icelake */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for icelake: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array_iterator &&value +) noexcept + : icelake::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : icelake::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/document-inl.h for icelake: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for icelake */ +/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ +/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ +/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace icelake { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace icelake::ondemand; + icelake::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + icelake::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + icelake::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace icelake { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::icelake::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/value-inl.h for icelake: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +/* end file simdjson/generic/ondemand/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_ONDEMAND_H +/* end file simdjson/icelake/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/ondemand.h: #include "simdjson/ppc64/ondemand.h" */ +/* begin file simdjson/ppc64/ondemand.h */ +#ifndef SIMDJSON_PPC64_ONDEMAND_H +#define SIMDJSON_PPC64_ONDEMAND_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for ppc64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for ppc64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::ppc64::number_type */ +using number_type = simdjson::ppc64::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/value.h for ppc64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() noexcept(false); + simdjson_inline operator ppc64::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for ppc64 */ +/* including simdjson/generic/ondemand/logger.h for ppc64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator.h for ppc64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator.h for ppc64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type.h for ppc64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +/* including simdjson/generic/ondemand/parser.h for ppc64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for ppc64 */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator.h for ppc64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/document.h for ppc64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream.h for ppc64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for ppc64 */ +/* including simdjson/generic/ondemand/field.h for ppc64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for ppc64 */ +/* including simdjson/generic/ondemand/object.h for ppc64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator.h for ppc64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization.h for ppc64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace ppc64 { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::ppc64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for ppc64 */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for ppc64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array_iterator &&value +) noexcept + : ppc64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : ppc64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document-inl.h for ppc64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace ppc64 { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace ppc64::ondemand; + ppc64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + ppc64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + ppc64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace ppc64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::ppc64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value-inl.h for ppc64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_ONDEMAND_H +/* end file simdjson/ppc64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/ondemand.h: #include "simdjson/westmere/ondemand.h" */ +/* begin file simdjson/westmere/ondemand.h */ +#ifndef SIMDJSON_WESTMERE_ONDEMAND_H +#define SIMDJSON_WESTMERE_ONDEMAND_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for westmere: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for westmere: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::westmere::number_type */ +using number_type = simdjson::westmere::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for westmere */ +/* including simdjson/generic/ondemand/value.h for westmere: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() noexcept(false); + simdjson_inline operator westmere::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for westmere */ +/* including simdjson/generic/ondemand/logger.h for westmere: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator.h for westmere: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator.h for westmere: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_type.h for westmere: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string.h for westmere: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for westmere */ +/* including simdjson/generic/ondemand/parser.h for westmere: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for westmere */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator.h for westmere: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for westmere */ +/* including simdjson/generic/ondemand/document.h for westmere: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for westmere */ +/* including simdjson/generic/ondemand/document_stream.h for westmere: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace westmere { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for westmere */ +/* including simdjson/generic/ondemand/field.h for westmere: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for westmere */ +/* including simdjson/generic/ondemand/object.h for westmere: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator.h for westmere: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for westmere */ +/* including simdjson/generic/ondemand/serialization.h for westmere: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace westmere { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::westmere::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for westmere */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for westmere: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array_iterator &&value +) noexcept + : westmere::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : westmere::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/document-inl.h for westmere: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for westmere */ +/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ +/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ +/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace westmere { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace westmere::ondemand; + westmere::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + westmere::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + westmere::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace westmere { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::westmere::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/value-inl.h for westmere: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +/* end file simdjson/generic/ondemand/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx/ondemand.h: #include "simdjson/lsx/ondemand.h" */ +/* begin file simdjson/lsx/ondemand.h */ +#ifndef SIMDJSON_LSX_ONDEMAND_H +#define SIMDJSON_LSX_ONDEMAND_H + +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for lsx: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for lsx: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::lsx::number_type */ +using number_type = simdjson::lsx::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for lsx */ +/* including simdjson/generic/ondemand/value_iterator.h for lsx: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for lsx */ +/* including simdjson/generic/ondemand/value.h for lsx: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() noexcept(false); + simdjson_inline operator lsx::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for lsx */ +/* including simdjson/generic/ondemand/logger.h for lsx: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for lsx */ +/* including simdjson/generic/ondemand/token_iterator.h for lsx: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for lsx */ +/* including simdjson/generic/ondemand/json_iterator.h for lsx: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for lsx */ +/* including simdjson/generic/ondemand/json_type.h for lsx: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for lsx */ +/* including simdjson/generic/ondemand/raw_json_string.h for lsx: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for lsx */ +/* including simdjson/generic/ondemand/parser.h for lsx: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for lsx */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for lsx: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for lsx */ +/* including simdjson/generic/ondemand/array_iterator.h for lsx: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for lsx */ +/* including simdjson/generic/ondemand/document.h for lsx: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() & noexcept(false); + simdjson_inline operator lsx::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator lsx::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() & noexcept(false); + simdjson_inline operator lsx::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator lsx::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for lsx */ +/* including simdjson/generic/ondemand/document_stream.h for lsx: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace lsx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for lsx */ +/* including simdjson/generic/ondemand/field.h for lsx: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for lsx */ +/* including simdjson/generic/ondemand/object.h for lsx: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for lsx */ +/* including simdjson/generic/ondemand/object_iterator.h for lsx: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lsx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lsx::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for lsx */ +/* including simdjson/generic/ondemand/serialization.h for lsx: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace lsx { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::lsx::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for lsx */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for lsx: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for lsx */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::array_iterator &&value +) noexcept + : lsx::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : lsx::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/document-inl.h for lsx: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for lsx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lsx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lsx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for lsx */ +/* including simdjson/generic/ondemand/field-inl.h for lsx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lsx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for lsx */ +/* including simdjson/generic/ondemand/logger-inl.h for lsx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lsx { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for lsx */ +/* including simdjson/generic/ondemand/object-inl.h for lsx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for lsx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/parser-inl.h for lsx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for lsx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lsx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace lsx { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lsx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lsx::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace lsx::ondemand; + lsx::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + lsx::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + lsx::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace lsx { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::lsx::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for lsx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/value-inl.h for lsx: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for lsx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ +/* end file simdjson/generic/ondemand/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_LSX_ONDEMAND_H +/* end file simdjson/lsx/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx/ondemand.h: #include "simdjson/lasx/ondemand.h" */ +/* begin file simdjson/lasx/ondemand.h */ +#ifndef SIMDJSON_LASX_ONDEMAND_H +#define SIMDJSON_LASX_ONDEMAND_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for lasx: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for lasx: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::lasx::number_type */ +using number_type = simdjson::lasx::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for lasx */ +/* including simdjson/generic/ondemand/value_iterator.h for lasx: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for lasx */ +/* including simdjson/generic/ondemand/value.h for lasx: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() noexcept(false); + simdjson_inline operator lasx::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for lasx */ +/* including simdjson/generic/ondemand/logger.h for lasx: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for lasx */ +/* including simdjson/generic/ondemand/token_iterator.h for lasx: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for lasx */ +/* including simdjson/generic/ondemand/json_iterator.h for lasx: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for lasx */ +/* including simdjson/generic/ondemand/json_type.h for lasx: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for lasx */ +/* including simdjson/generic/ondemand/raw_json_string.h for lasx: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for lasx */ +/* including simdjson/generic/ondemand/parser.h for lasx: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for lasx */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for lasx: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for lasx */ +/* including simdjson/generic/ondemand/array_iterator.h for lasx: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for lasx */ +/* including simdjson/generic/ondemand/document.h for lasx: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() & noexcept(false); + simdjson_inline operator lasx::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator lasx::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() & noexcept(false); + simdjson_inline operator lasx::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator lasx::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for lasx */ +/* including simdjson/generic/ondemand/document_stream.h for lasx: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace lasx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for lasx */ +/* including simdjson/generic/ondemand/field.h for lasx: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for lasx */ +/* including simdjson/generic/ondemand/object.h for lasx: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for lasx */ +/* including simdjson/generic/ondemand/object_iterator.h for lasx: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public lasx::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(lasx::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for lasx */ +/* including simdjson/generic/ondemand/serialization.h for lasx: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace lasx { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::lasx::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for lasx */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for lasx: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + if (json_path.empty() || (json_path.front() != '.' && + json_path.front() != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + size_t i = 0; + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } + + return result; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for lasx */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::array_iterator &&value +) noexcept + : lasx::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : lasx::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/document-inl.h for lasx: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document::operator T() noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for lasx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lasx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lasx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +/* including simdjson/generic/ondemand/field-inl.h for lasx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lasx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for lasx */ +/* including simdjson/generic/ondemand/logger-inl.h for lasx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lasx { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for lasx */ +/* including simdjson/generic/ondemand/object-inl.h for lasx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for lasx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/parser-inl.h for lasx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for lasx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lasx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace lasx { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lasx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lasx::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace lasx::ondemand; + lasx::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + lasx::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + lasx::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace lasx { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::lasx::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for lasx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/value-inl.h for lasx: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_path_to_pointer_conversion-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return static_cast(first); +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for lasx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ +/* end file simdjson/generic/ondemand/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ + +#endif // SIMDJSON_LASX_ONDEMAND_H +/* end file simdjson/lasx/ondemand.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { + /** + * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand + */ + namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_ONDEMAND_H +/* end file simdjson/builtin/ondemand.h */ + +namespace simdjson { + /** + * @copydoc simdjson::builtin::ondemand + */ + namespace ondemand = builtin::ondemand; +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_H +/* end file simdjson/ondemand.h */ + +#endif // SIMDJSON_H +/* end file simdjson.h */ From 90bca751b9b3d0acb1268de824ac73d3e9c3f0e6 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 29 Jul 2024 02:42:11 +0200 Subject: [PATCH 177/216] Added load_fxt and unload_fxt opcodes (#170) --- CHANGELOG.md | 2 + cleo_plugins/Text/CTextManager.cpp | 33 ++++++++----- cleo_plugins/Text/CTextManager.h | 2 +- cleo_plugins/Text/Text.cpp | 44 +++++++++++++++++- tests/cleo_tests/Text/2606.txt | 56 ++++++++++++++++++++++ tests/cleo_tests/Text/2607.txt | 74 ++++++++++++++++++++++++++++++ tests/cleo_tests/Text/Test.fxt | 3 ++ 7 files changed, 201 insertions(+), 13 deletions(-) create mode 100644 tests/cleo_tests/Text/2606.txt create mode 100644 tests/cleo_tests/Text/2607.txt create mode 100644 tests/cleo_tests/Text/Test.fxt diff --git a/CHANGELOG.md b/CHANGELOG.md index 22bcf066..18a0587f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,8 @@ - new opcode **2603 ([is_text_prefix](https://library.sannybuilder.com/#/sa/text/2603))** - new opcode **2604 ([is_text_suffix](https://library.sannybuilder.com/#/sa/text/2604))** - new opcode **2605 ([display_text_formatted](https://library.sannybuilder.com/#/sa/text/2605))** + - new opcode **2606 ([load_fxt](https://library.sannybuilder.com/#/sa/text/2606))** + - new opcode **2607 ([unload_fxt](https://library.sannybuilder.com/#/sa/text/2607))** - new and updated opcodes - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** diff --git a/cleo_plugins/Text/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp index 06117f9d..57211694 100644 --- a/cleo_plugins/Text/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -25,14 +25,13 @@ namespace CLEO bool CTextManager::AddFxt(const char *key, const char *value, bool dynamic) { - // TODO: replace this part with in-place construction of FxtEntry, - // when it will be implemented in libstdc++ - // ^^ me iz noob and doesnt can use GNU - auto fxt = fxts.find(key); if (fxt != fxts.end()) { + if (fxt->second->text.compare(value) == 0) + return true; // already present + if (!dynamic || fxt->second->is_static) { LOG_WARNING(0, "Attempting to add FXT \'%s\' - FAILED (GXT conflict)", key, value); @@ -47,6 +46,7 @@ namespace CLEO std::transform(str.begin(), str.end(), str.begin(), ::toupper); fxts[str.c_str()] = new FxtEntry(value, !dynamic); } + return true; } @@ -122,13 +122,13 @@ namespace CLEO { } - size_t CTextManager::ParseFxtFile(std::istream& stream) + size_t CTextManager::ParseFxtFile(std::istream& stream, bool dynamic, bool remove) { static char buf[0x100]; char *key_iterator, *value_iterator, *value_start, *key_start; stream.exceptions(std::ios::badbit); - size_t addedCount = 0; + size_t keyCount = 0; while (true) { if (stream.eof()) break; @@ -142,11 +142,22 @@ namespace CLEO { if (*key_iterator == '#') // start of comment break; - if (*key_iterator == '/' && key_iterator[1] == '/') + if (*key_iterator == '/' && key_iterator[1] == '/') // comment break; + if (isspace(*key_iterator)) { - *key_iterator = '\0'; + *key_iterator = '\0'; // terminate key string + + if (remove) + { + if (RemoveFxt(key_start)) + { + keyCount++; + } + break; + } + // while (isspace(*++key_iterator)) ; // skip leading spaces value_start = value_iterator = key_iterator + 1; while (*value_iterator) @@ -162,9 +173,9 @@ namespace CLEO } // register found fxt entry - if (AddFxt(key_start, value_start, false)) + if (AddFxt(key_start, value_start, dynamic)) { - addedCount++; + keyCount++; } break; @@ -173,6 +184,6 @@ namespace CLEO } } - return addedCount; + return keyCount; } } diff --git a/cleo_plugins/Text/CTextManager.h b/cleo_plugins/Text/CTextManager.h index 5aa76e91..2668e62e 100644 --- a/cleo_plugins/Text/CTextManager.h +++ b/cleo_plugins/Text/CTextManager.h @@ -35,6 +35,6 @@ namespace CLEO const char *LocateFxt(const char *key); // erase all fxts, added by scripts void ClearDynamicFxts(); - size_t ParseFxtFile(std::istream& stream); + size_t ParseFxtFile(std::istream& stream, bool dynamic = false, bool remove = false); }; } diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp index 8614efe4..deada1b5 100644 --- a/cleo_plugins/Text/Text.cpp +++ b/cleo_plugins/Text/Text.cpp @@ -59,6 +59,8 @@ class Text CLEO_RegisterOpcode(0x2603, opcode_2603); // is_text_prefix CLEO_RegisterOpcode(0x2604, opcode_2604); // is_text_sufix CLEO_RegisterOpcode(0x2605, opcode_2605); // display_text_formatted + CLEO_RegisterOpcode(0x2606, opcode_2606); // load_fxt + CLEO_RegisterOpcode(0x2607, opcode_2607); // unload_fxt // register event callbacks CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); @@ -427,7 +429,7 @@ class Text OPCODE_READ_PARAM_STRING_FORMATTED(text); // new GXT label - // includes unprintable character, to ensure there will be no collision with user GXT lables + // includes unprintable character, to ensure there will be no collision with user GXT labels char gxt[8] = { 0x01, 'C', 'L', 'E', 'O', '_', 0x01, 0x00 }; gxt[6] += CTheScripts::NumberOfIntroTextLinesThisFrame; // unique label for each possible entry @@ -442,6 +444,46 @@ class Text return OR_CONTINUE; } + + //2606=1, load_fxt %1d% + static OpcodeResult __stdcall opcode_2606(CLEO::CRunningScript* thread) + { + OPCODE_READ_PARAM_FILEPATH(filename); + + size_t added = 0; + try + { + std::ifstream stream(filename); + added = textManager.ParseFxtFile(stream, true, false); + } + catch (std::exception& ex) + { + LOG_WARNING(0, "Loading of FXT file '%s' failed: \n%s", filename, ex.what()); + } + + OPCODE_CONDITION_RESULT(added != 0); + return OR_CONTINUE; + } + + //2607=1, unload_fxt %1d% + static OpcodeResult __stdcall opcode_2607(CLEO::CRunningScript* thread) + { + OPCODE_READ_PARAM_FILEPATH(filename); + + size_t removed = 0; + try + { + std::ifstream stream(filename); + removed = textManager.ParseFxtFile(stream, true, true); + } + catch (std::exception& ex) + { + LOG_WARNING(0, "Unloading of FXT file '%s' failed: \n%s", filename, ex.what()); + } + + OPCODE_CONDITION_RESULT(removed != 0); + return OR_CONTINUE; + } } textInstance; CTextManager Text::textManager; diff --git a/tests/cleo_tests/Text/2606.txt b/tests/cleo_tests/Text/2606.txt new file mode 100644 index 00000000..eb85b7fa --- /dev/null +++ b/tests/cleo_tests/Text/2606.txt @@ -0,0 +1,56 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2606' +test("2606 (load_fxt)", tests) +terminate_this_custom_script + +function tests + it("GXTs should not exists yet", test1) + it("should load new GXTs", test2) + return + + function test1 + int ptr + + ptr = get_text_label_string {key} 'CLEO_T1' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T2' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T3' + assert_ptr(ptr) + assert_eqs(ptr, "") + end + + function test2 + 2606: load_fxt {filepath} "cleo\cleo_tests\text\non existing file.fxt" + assert_result_false() + + 2606: load_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + assert_result_true() + + int ptr + + ptr = get_text_label_string {key} 'CLEO_T1' + assert_ptr(ptr) + assert_eqs(ptr, "Test one") + + ptr = get_text_label_string {key} 'CLEO_T2' + assert_ptr(ptr) + assert_eqs(ptr, "Test two") + + ptr = get_text_label_string {key} 'CLEO_T3' + assert_ptr(ptr) + assert_eqs(ptr, "Test three") + + // load again + 2606: load_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + assert_result_true() + + unload_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + end +end diff --git a/tests/cleo_tests/Text/2607.txt b/tests/cleo_tests/Text/2607.txt new file mode 100644 index 00000000..1433cf0d --- /dev/null +++ b/tests/cleo_tests/Text/2607.txt @@ -0,0 +1,74 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2607' +test("2607 (unload_fxt)", tests) +terminate_this_custom_script + +function tests + it("GXTs should not exists yet", test1) + it("should load new GXTs", test2) + it("should unload GXTs", test3) + return + + function test1 + int ptr + + ptr = get_text_label_string {key} 'CLEO_T1' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T2' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T3' + assert_ptr(ptr) + assert_eqs(ptr, "") + end + + function test2 + load_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + assert_result_true() + + int ptr + + ptr = get_text_label_string {key} 'CLEO_T1' + assert_ptr(ptr) + assert_eqs(ptr, "Test one") + + ptr = get_text_label_string {key} 'CLEO_T2' + assert_ptr(ptr) + assert_eqs(ptr, "Test two") + + ptr = get_text_label_string {key} 'CLEO_T3' + assert_ptr(ptr) + assert_eqs(ptr, "Test three") + + // load again + load_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + assert_result_true() + end + + function test3 + 2607: unload_fxt {filepath} "cleo\cleo_tests\text\non existing file.fxt" + assert_result_false() + + 2607: unload_fxt {filepath} "cleo\cleo_tests\text\test.fxt" + assert_result_true() + + int ptr + + ptr = get_text_label_string {key} 'CLEO_T1' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T2' + assert_ptr(ptr) + assert_eqs(ptr, "") + + ptr = get_text_label_string {key} 'CLEO_T3' + assert_ptr(ptr) + assert_eqs(ptr, "") + end +end diff --git a/tests/cleo_tests/Text/Test.fxt b/tests/cleo_tests/Text/Test.fxt new file mode 100644 index 00000000..82d1ecc1 --- /dev/null +++ b/tests/cleo_tests/Text/Test.fxt @@ -0,0 +1,3 @@ +CLEO_T1 Test one +CLEO_T2 Test two +CLEO_T3 Test three \ No newline at end of file From 7cd0afdded1deeaa4a14b69a01e06cd98c3d85df Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 30 Jul 2024 01:22:08 +0200 Subject: [PATCH 178/216] Fixed spawn_vehicle_by_cheating opcode not working with boats. (#171) --- source/CCustomOpcodeSystem.cpp | 15 +++++++-------- source/stdafx.h | 15 --------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index ca08a3ca..b6d899db 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -5,6 +5,7 @@ #include "ScmFunction.h" #include "CCheat.h" #include "CModelInfo.h" +#include "CVehicle.h" #include #include @@ -1339,15 +1340,13 @@ namespace CLEO { auto modelIndex = OPCODE_READ_PARAM_INT(); - CVehicleModelInfo* model; - // if 1.0 US, prefer GetModelInfo function — makes it compatible with fastman92's limit adjuster - if (CLEO::GetInstance().VersionManager.GetGameVersion() == CLEO::GV_US10) { - model = plugin::CallAndReturn(modelIndex); - } - else { - model = reinterpret_cast(CModelInfo::ms_modelInfoPtrs[modelIndex]); + auto model = (CVehicleModelInfo*)CModelInfo::GetModelInfo(modelIndex); // compatible with fastman92's limit adjuster + + if (model->m_nVehicleType != -1 && model->m_nVehicleType != eVehicleType::VEHICLE_TRAIN) + { + SpawnCar(modelIndex); } - if (model->m_nVehicleType != VEHICLE_TYPE_TRAIN && model->m_nVehicleType != VEHICLE_TYPE_UNKNOWN) SpawnCar(modelIndex); + return OR_CONTINUE; } diff --git a/source/stdafx.h b/source/stdafx.h index 8a52a3a0..96603147 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -93,21 +93,6 @@ class CTextDrawer VALIDATE_SIZE(CTextDrawer, 0x44); -enum -{ - VEHICLE_TYPE_AUTOMOBILE, - VEHICLE_TYPE_PLANE, - VEHICLE_TYPE_BIKE, - VEHICLE_TYPE_HELI, - VEHICLE_TYPE_BOAT, - VEHICLE_TYPE_TRAIN, - VEHICLE_TYPE_TRAILER, - VEHICLE_TYPE_BMX, - VEHICLE_TYPE_MTRUCK, - VEHICLE_TYPE_QUAD, - VEHICLE_TYPE_UNKNOWN = -1, -}; - inline CEntity* GetWeaponTarget(CPed* pSelf) { return reinterpret_cast(pSelf->m_pTargetedObject) != -1 ? pSelf->m_pTargetedObject : nullptr; From 0a0b18c7dae78ee577478755a1a12860a059d270 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 30 Jul 2024 22:46:17 +0200 Subject: [PATCH 179/216] Keep default volume for audio streams in legacy scripts (#173) Keep legacy volume for audio streams in legacy scripts Added example scripts directory --- .github/workflows/main.yml | 2 + cleo_plugins/Audio/Audio.cpp | 10 + cleo_plugins/Audio/CAudioStream.cpp | 1 - cleo_plugins/Audio/CAudioStream.h | 2 +- cleo_plugins/Audio/CSoundSystem.cpp | 4 +- cleo_plugins/Audio/CSoundSystem.h | 3 +- cleo_plugins/Audio/SA.Audio.ini | 8 +- examples/Audio_Demo.txt | 310 ++++++++++++++++++++++++++++ 8 files changed, 331 insertions(+), 9 deletions(-) create mode 100644 examples/Audio_Demo.txt diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f89b041..fbc11198 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -81,6 +81,7 @@ jobs: mkdir .output\Release\cleo\cleo_saves mkdir .output\Release\cleo\cleo_text mkdir .output\Release\cleo_readme + mkdir .output\Release\cleo_readme\examples @REM copy files copy source\cleo_config.ini .output\Release\cleo\.cleo_config.ini @@ -88,6 +89,7 @@ jobs: copy cleo_plugins\.output\*.ini .output\Release\cleo\cleo_plugins copy cleo_plugins\Audio\bass\bass.dll .output\Release\bass.dll xcopy /E /I tests .output\Release\cleo + xcopy /E /I examples .output\Release\cleo_readme\examples @REM copy SDK copy .output\Release\CLEO.lib cleo_sdk\CLEO.lib diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index 873b18ec..0da56351 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -112,6 +112,11 @@ class Audio auto ptr = soundSystem.CreateStream(path); + if (ptr != nullptr && IsLegacyScript(thread)) + { + ptr->SetType(CLEO::CSoundSystem::LegacyModeDefaultStreamType); + } + OPCODE_WRITE_PARAM_PTR(ptr); OPCODE_CONDITION_RESULT(ptr != nullptr); return OR_CONTINUE; @@ -217,6 +222,11 @@ class Audio auto ptr = soundSystem.CreateStream(path, true); + if (ptr != nullptr && IsLegacyScript(thread)) + { + ptr->SetType(CLEO::CSoundSystem::LegacyModeDefaultStreamType); + } + OPCODE_WRITE_PARAM_PTR(ptr); OPCODE_CONDITION_RESULT(ptr != nullptr); return OR_CONTINUE; diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index b539c2c1..9be1eea5 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -22,7 +22,6 @@ CAudioStream::CAudioStream(const char* filepath) return; } - SetType(CSoundSystem::defaultStreamType); BASS_ChannelGetAttribute(streamInternal, BASS_ATTRIB_FREQ, &rate); ok = true; } diff --git a/cleo_plugins/Audio/CAudioStream.h b/cleo_plugins/Audio/CAudioStream.h index 8844018a..b68a3a95 100644 --- a/cleo_plugins/Audio/CAudioStream.h +++ b/cleo_plugins/Audio/CAudioStream.h @@ -55,7 +55,7 @@ namespace CLEO protected: HSTREAM streamInternal = 0; eStreamState state = Paused; - eStreamType type = None; + eStreamType type = eStreamType::SoundEffect; bool ok = false; float rate = 44100.0f; // file's sampling rate double speed = 1.0f; diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index 3166f55a..b5d148af 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -13,7 +13,7 @@ namespace CLEO BASS_3DVECTOR CSoundSystem::vel(0.0, 0.0, 0.0); BASS_3DVECTOR CSoundSystem::front(0.0, -1.0, 0.0); BASS_3DVECTOR CSoundSystem::top(0.0, 0.0, 1.0); - eStreamType CSoundSystem::defaultStreamType = eStreamType::SoundEffect; + eStreamType CSoundSystem::LegacyModeDefaultStreamType = eStreamType::None; float CSoundSystem::masterSpeed = 1.0f; float CSoundSystem::masterVolumeSfx = 1.0f; float CSoundSystem::masterVolumeMusic = 1.0f; @@ -55,7 +55,7 @@ namespace CLEO if (initialized) return true; // already done auto config = GetConfigFilename(); - defaultStreamType = (eStreamType)GetPrivateProfileInt("General", "DefaultStreamType", 0, config.c_str()); + LegacyModeDefaultStreamType = (eStreamType)GetPrivateProfileInt("General", "LegacyModeDefaultStreamType", 0, config.c_str()); allowNetworkSources = GetPrivateProfileInt("General", "AllowNetworkSources", 1, config.c_str()) != 0; int default_device, total_devices, enabled_devices; diff --git a/cleo_plugins/Audio/CSoundSystem.h b/cleo_plugins/Audio/CSoundSystem.h index 28c93682..956141c8 100644 --- a/cleo_plugins/Audio/CSoundSystem.h +++ b/cleo_plugins/Audio/CSoundSystem.h @@ -31,12 +31,13 @@ namespace CLEO static BASS_3DVECTOR vel; static BASS_3DVECTOR front; static BASS_3DVECTOR top; - static eStreamType defaultStreamType; static float masterSpeed; // game simulation speed static float masterVolumeSfx; static float masterVolumeMusic; public: + static eStreamType LegacyModeDefaultStreamType; + CSoundSystem() = default; // TODO: give to user an ability to force a sound device to use (ini-file or cmd-line?) ~CSoundSystem(); diff --git a/cleo_plugins/Audio/SA.Audio.ini b/cleo_plugins/Audio/SA.Audio.ini index 9103f5b5..73d0bcd4 100644 --- a/cleo_plugins/Audio/SA.Audio.ini +++ b/cleo_plugins/Audio/SA.Audio.ini @@ -1,10 +1,10 @@ [General] -; Manually select audio device. Visit `.cleo.log` file to check list of available options. -1 for automatic +; Manually select audio device. See `.cleo.log` file to check list of available options. -1 for automatic AudioDevice=-1 ; Allow playing streams from http(s) locations AllowNetworkSources=1 -; Which game's volume settings CLEO sounds should use by default: 0 - None, 1 - SFX, 2 - Music -DefaultStreamType=1 - +; Sounds created from scripts in legacy mode (*.cs3 or *.cs4) should use by default game's volume settings: 0 - None, 1 - SFX, 2 - Music +; Select 1 if you have older mods that play sounds too loud +LegacyModeDefaultStreamType=0 diff --git a/examples/Audio_Demo.txt b/examples/Audio_Demo.txt new file mode 100644 index 00000000..b4a564dd --- /dev/null +++ b/examples/Audio_Demo.txt @@ -0,0 +1,310 @@ +// CLEO5 example script +// Sanny Builder 4 +// mode: GTA SA(v1.0 - SBL) +{$CLEO .cs} + +const Model_Speaker = 2229 +const Audio_Path = ".\cleo_tests\Audio\Ding.mp3" // this script's file relative location + +wait 1000 + +int sound_stream, speaker_object +float value, valueCurr + + +while true + wait 0 + print_formatted_now "CLEO5 Audio Plugin DEMO~n~Press key 1 - 7" {time} 0 + + if + test_cheat "1" + then + print_formatted_now "DEMO 1: Get progress" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_state sound_stream AudioStreamAction.Play + + while is_audio_stream_playing sound_stream + valueCurr = get_audio_stream_progress sound_stream + print_formatted_now "DEMO 1: Get progress: %.2f" {time} 0 {args} valueCurr + wait 0 + end + + END_TEST() + end + + if + test_cheat "2" + then + print_formatted_now "DEMO 2: Set volume" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_state sound_stream AudioStreamAction.Play + + TIMERA = 1000 + repeat + if + TIMERA >= 1000 + then + value = generate_random_float_in_range 0.0 2.0 + set_audio_stream_volume sound_stream value + TIMERA = 0 + end + + print_formatted_now "DEMO 2: Set volume: %.2f~n~Press 2 to stop" {time} 0 {args} value + wait 0 + until test_cheat "2" + + END_TEST() + end + + if + test_cheat "3" + then + print_formatted_now "Set volume with transition" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_state sound_stream AudioStreamAction.Play + + TIMERA = 2000 + repeat + if + TIMERA >= 2000 + then + value = generate_random_float_in_range 0.0 2.0 + set_audio_stream_volume_with_transition sound_stream {volume} value {timeMs} 1000 + TIMERA = 0 + end + + valueCurr = get_audio_stream_volume sound_stream + print_formatted_now "DEMO 3: Set volume: %.2f, Transition: 1.0s~n~Current volume: %.2f~n~Press 3 to stop" {time} 0 {args} value valueCurr + wait 0 + until test_cheat "3" + + END_TEST() + end + + if + test_cheat "4" + then + print_formatted_now "DEMO 4: Set speed" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_state sound_stream AudioStreamAction.Play + + TIMERA = 1000 + repeat + if + TIMERA >= 1000 + then + value = generate_random_float_in_range 0.0 2.0 + set_audio_stream_speed sound_stream value + TIMERA = 0 + end + + print_formatted_now "DEMO 4: Set speed %.2f~n~Press 4 to stop" {time} 0 {args} value + wait 0 + until test_cheat "4" + + END_TEST() + end + + if + test_cheat "5" + then + print_formatted_now "DEMO 5: Set speed with transition" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_state sound_stream AudioStreamAction.Play + + TIMERA = 2000 + repeat + if + TIMERA >= 2000 + then + value = generate_random_float_in_range 0.0 2.0 + set_audio_stream_speed_with_transition sound_stream {volume} value {timeMs} 1000 + TIMERA = 0 + end + + valueCurr = get_audio_stream_speed sound_stream + print_formatted_now "DEMO 5: Set speed: %.2f, Transition: 1.0s~n~Current speed: %.2f~n~Press 5 to stop" {time} 0 {args} value valueCurr + wait 0 + until test_cheat "5" + + END_TEST() + end + + if + test_cheat "6" + then + print_formatted_now "DEMO 6: Doppler effect" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_volume sound_stream {volume} 4.0 + set_audio_stream_state sound_stream AudioStreamAction.Play + wait 1000 + + int direction = 0 + repeat + if + not camera_is_vector_move_running + then + camera_set_vector_track {from} 230.0 2525.0 16.5 {to} 230.0 2525.0 16.5 {time} 1000 {ease} true + + if + direction == 0 + then + camera_set_vector_move {from} 260.0 2522.0 18.0 {to} 200.0 2522.0 18.0 {time} 1000 {ease} true + else + camera_set_vector_move {from} 200.0 2522.0 18.0 {to} 260.0 2522.0 18.0 {time} 1000 {ease} true + end + direction = 1 - direction + end + + wait 0 + print_formatted_now "DEMO 6: Doppler effect~n~Press 6 to stop" {time} 0 + until test_cheat "6" + + END_TEST() + end + + if + test_cheat "7" + then + print_formatted_now "DEMO 7: Game speed changes" {time} 10000 + START_TEST() + + if + not load_3d_audio_stream Audio_Path {store_to} sound_stream + then + print_formatted_now "~r~Failed to load audio file!" {time} 10000 + END_TEST() + continue + end + set_play_3d_audio_stream_at_object sound_stream {object} speaker_object + set_audio_stream_looped sound_stream true + set_audio_stream_state sound_stream AudioStreamAction.Play + + // walk + set_player_control 0 {control} false + flush_route + extend_route {xyz} 229.0 2525.0 15.5 + extend_route {xyz} 230.0 2524.0 15.5 + extend_route {xyz} 231.0 2525.0 15.5 + extend_route {xyz} 230.0 2526.0 15.5 + task_follow_point_route $scplayer {walk_speed} 6 {flag} 3 + task_look_at_object $scplayer {object} speaker_object {time} -1 + + TIMERA = 2000 + repeat + if + TIMERA >= 2000 + then + value = generate_random_float_in_range 0.2 3.0 + set_time_scale value + TIMERA = 0 + end + + print_formatted_now "DEMO 7: Game speed: %.2f~n~Press 7 to stop" {time} 0 {args} value + wait 0 + until test_cheat "7" + + set_time_scale 1.0 + clear_char_tasks_immediately $scplayer + set_player_control 0 {control} true + END_TEST() + end +end + +terminate_this_custom_script + + +:START_TEST + remove_all_char_weapons $scplayer + set_max_wanted_level 0 + + display_radar false + display_hud false + + set_char_coordinates $scplayer {xyz} 230.0 2527.0 15.5 + set_char_heading $scplayer {heading} 180.0 + set_area_visible 0 + set_char_area_visible $scplayer {interiorId} 0 + task_scratch_head $scplayer + + set_fixed_camera_position {xyz} 230.0 2522.0 18.0 {ypr} 0.0 0.0 0.0 + point_camera_at_char $scplayer {mode} CameraMode.Fixed {switchStyle} SwitchType.JumpCut + wait 1000 + + // create speaker object (visual only) + request_model {modelId} Model_Speaker + load_all_models_now + speaker_object = create_object {modelId} Model_Speaker {xyz} 230.25 2525.0 16.0 + mark_model_as_no_longer_needed {modelId} Model_Speaker + set_object_collision speaker_object {state} false + point_camera_at_point {xyz} 230.0 2525.0 16.5 {switchStyle} SwitchType.Interpolation +return + + +:END_TEST + remove_audio_stream sound_stream + delete_object speaker_object + camera_reset_new_scriptables + restore_camera + display_hud true + display_radar true +return From 0ac620fc42778f8ce80dead6e6dd322dfbed0ab8 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 30 Jul 2024 22:47:01 +0200 Subject: [PATCH 180/216] Added get_text_length opcode (#172) --- CHANGELOG.md | 1 + cleo_plugins/Text/Text.cpp | 12 ++++++++++++ tests/cleo_tests/Text/2608.txt | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 tests/cleo_tests/Text/2608.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 18a0587f..258761a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ - new opcode **2605 ([display_text_formatted](https://library.sannybuilder.com/#/sa/text/2605))** - new opcode **2606 ([load_fxt](https://library.sannybuilder.com/#/sa/text/2606))** - new opcode **2607 ([unload_fxt](https://library.sannybuilder.com/#/sa/text/2607))** + - new opcode **2608 ([get_text_length](https://library.sannybuilder.com/#/sa/text/2608))** - new and updated opcodes - implemented support for **memory pointer string** arguments for all game's native opcodes - **0B1E ([sign_extend](https://library.sannybuilder.com/#/sa/bitwise/0B1E))** diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp index deada1b5..c44dac99 100644 --- a/cleo_plugins/Text/Text.cpp +++ b/cleo_plugins/Text/Text.cpp @@ -61,6 +61,7 @@ class Text CLEO_RegisterOpcode(0x2605, opcode_2605); // display_text_formatted CLEO_RegisterOpcode(0x2606, opcode_2606); // load_fxt CLEO_RegisterOpcode(0x2607, opcode_2607); // unload_fxt + CLEO_RegisterOpcode(0x2608, opcode_2608); // get_text_length // register event callbacks CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); @@ -484,6 +485,17 @@ class Text OPCODE_CONDITION_RESULT(removed != 0); return OR_CONTINUE; } + + //2608=3,get_text_length %1d% store_to %2d% + static OpcodeResult __stdcall opcode_2608(CLEO::CRunningScript* thread) + { + OPCODE_READ_PARAM_STRING(str); + + auto result = strlen(str); + + OPCODE_WRITE_PARAM_INT(result); + return OR_CONTINUE; + } } textInstance; CTextManager Text::textManager; diff --git a/tests/cleo_tests/Text/2608.txt b/tests/cleo_tests/Text/2608.txt new file mode 100644 index 00000000..6985cb92 --- /dev/null +++ b/tests/cleo_tests/Text/2608.txt @@ -0,0 +1,33 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name '2608' +test("2608 (get_text_length)", tests) +terminate_this_custom_script + +function tests + it("should return text length", test1) + return + + function test1 + int length + + shortstring short = 'One' + 2608: get_text_length {text} short {length} length + assert_eq(length, 3) + + string_format short = '' + 2608: get_text_length {text} short {length} length + assert_eq(length, 0) + + longstring long = "Longer string" + 2608: get_text_length {text} long {length} length + assert_eq(length, 13) + + int buff = allocate_memory {size} 255 + string_format buff = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc et tellus vehicula, tempus felis malesuada, commodo nunc. Interdum et malesuada fames ac ante ipsum primis in faucibus. Etiam condimentum porttitor lorem est." + 2608: get_text_length {text} buff {length} length + free_memory {address} buff + assert_eq(length, 220) + end +end From 57306e11172b3fcf5d70720eb5d3a2809fff8161 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 1 Aug 2024 22:08:10 +0200 Subject: [PATCH 181/216] Removed alpha to make colors more vibrant. (#175) Slightly increased font size. --- cleo_plugins/DebugUtils/SA.DebugUtils.ini | 2 +- cleo_plugins/DebugUtils/ScreenLog.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cleo_plugins/DebugUtils/SA.DebugUtils.ini b/cleo_plugins/DebugUtils/SA.DebugUtils.ini index df9726ca..829c7ef3 100644 --- a/cleo_plugins/DebugUtils/SA.DebugUtils.ini +++ b/cleo_plugins/DebugUtils/SA.DebugUtils.ini @@ -7,4 +7,4 @@ LegacyDebugOpcodes=0 Level=2 MessageTime=3000 MessagesMax=45 -FontSize=40 +FontSize=45 diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index fa84fbb1..a487b783 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -25,10 +25,10 @@ class ScreenLog DWORD timeFadeout; // miliseconds const CRGBA fontColor[4] = { // colors for eLogLevel - CRGBA(0xDD, 0xDD, 0xDD, 0xF0), // None - CRGBA(0xFF, 0x30, 0x30, 0xF0), // Error - CRGBA(0xFF, 0xEE, 0x30, 0xF0), // User - CRGBA(0xDD, 0xDD, 0xDD, 0xF0), // Default + CRGBA(0xDD, 0xDD, 0xDD, 0xFF), // None + CRGBA(0xFF, 0x30, 0x30, 0xFF), // Error + CRGBA(0xFF, 0xEE, 0x30, 0xFF), // User + CRGBA(0xDD, 0xDD, 0xDD, 0xFF), // Default }; struct Entry From 6aeb73751a8b018318cc8408e1aaa0a470cbcd92 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 2 Aug 2024 04:39:07 +0200 Subject: [PATCH 182/216] =?UTF-8?q?Standardized=20warning=20messages=20of?= =?UTF-8?q?=20terminate=5Fthis=5Fscript=20and=20terminate=5F=E2=80=A6=20(#?= =?UTF-8?q?174)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/CCustomOpcodeSystem.cpp | 8 ++++---- source/CScriptEngine.cpp | 25 ++++++++----------------- source/CScriptEngine.h | 4 +++- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index b6d899db..3c3566b7 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -863,11 +863,11 @@ namespace CLEO CCustomScript *cs = reinterpret_cast(thread); if (thread->IsMission() || !cs->IsCustom()) { - LOG_WARNING(0, "Incorrect usage of opcode [0A93] in script %s", ((CCustomScript*)thread)->GetInfoStr().c_str()); - - return OR_CONTINUE; + LOG_WARNING(0, "Incorrect usage of opcode [0A93] in script %s. Use [004E] instead.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + return OR_CONTINUE; // legacy behavior } - GetInstance().ScriptEngine.RemoveCustomScript(cs); + + GetInstance().ScriptEngine.RemoveScript(thread); return OR_INTERRUPT; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 53e8d602..cae3f7b8 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -313,16 +313,6 @@ namespace CLEO extern "C" void __stdcall opcode_004E(CCustomScript *pScript) // terminate_this_script { - if (pScript->IsCustom()) - { - if (pScript->IsMission()) - *MissionLoaded = false; - else - { - TRACE("Incorrect usage of opcode [004E] in script %s.", pScript->GetName().c_str()); - } - } - GetInstance().ScriptEngine.RemoveScript(pScript); } @@ -1369,17 +1359,18 @@ namespace CLEO void CScriptEngine::RemoveScript(CRunningScript* thread) { - if (!thread->IsCustom()) + if (thread->IsMission()) *MissionLoaded = false; + + if (thread->IsCustom()) + { + RemoveCustomScript((CCustomScript*)thread); + } + else // native script { - if (thread->IsMission()) *MissionLoaded = false; RemoveScriptFromQueue(thread, activeThreadQueue); AddScriptToQueue(thread, inactiveThreadQueue); StopScript(thread); } - else - { - RemoveCustomScript((CCustomScript*)thread); - } } void CScriptEngine::RemoveCustomScript(CCustomScript *cs) @@ -1397,7 +1388,7 @@ namespace CLEO } for (auto childThread : cs->childThreads) { - CScriptEngine::RemoveCustomScript(childThread); + CScriptEngine::RemoveScript(childThread); } if (cs == CustomMission) { diff --git a/source/CScriptEngine.h b/source/CScriptEngine.h index 636bf925..6197ef4e 100644 --- a/source/CScriptEngine.h +++ b/source/CScriptEngine.h @@ -142,7 +142,6 @@ namespace CLEO bool IsValidScriptPtr(const CRunningScript*) const; // leads to any script? (regular or custom) void AddCustomScript(CCustomScript*); void RemoveScript(CRunningScript*); // native or custom - void RemoveCustomScript(CCustomScript*); void RemoveAllCustomScripts(); void UnregisterAllScripts(); void ReregisterAllScripts(); @@ -151,6 +150,9 @@ namespace CLEO inline CCustomScript* GetCustomMission() { return CustomMission; } inline size_t WorkingScriptsCount() { return CustomScripts.size(); } + + private: + void RemoveCustomScript(CCustomScript*); }; extern void(__thiscall * AddScriptToQueue)(CRunningScript *, CRunningScript **queue); From 62fe46d995af2d31cc27077d54b625b17ed6959c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 3 Aug 2024 02:40:29 +0200 Subject: [PATCH 183/216] Allowed more args in cleo_return than caller expected in legacy modes. (#176) --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 3c3566b7..d3d5b51f 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1121,7 +1121,7 @@ namespace CLEO returnParamCount = declaredParamCount; } - return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, true, returnParamCount); + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, !IsLegacyScript(thread), returnParamCount); } //0AB3=2,set_cleo_shared_var %1d% = %2d% From 53d5198d11ecb6e998a5b59a6410e1e20330b76c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 10 Sep 2024 17:15:22 +0200 Subject: [PATCH 184/216] Unregistering callbacks in plugins cleanup. (#183) --- cleo_plugins/Audio/Audio.cpp | 9 ++++ cleo_plugins/DebugUtils/DebugUtils.cpp | 8 +++ .../FileSystemOperations.cpp | 5 ++ .../MemoryOperations/MemoryOperations.cpp | 5 ++ cleo_plugins/Text/Text.cpp | 12 ++++- cleo_sdk/CLEO.h | 2 + cleo_sdk/CLEO_Utils.h | 33 +++++++++++- source/CleoBase.cpp | 10 ++++ source/CleoBase.h | 1 + source/cleo.def | 53 ++++++++++--------- 10 files changed, 110 insertions(+), 28 deletions(-) diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index 0da56351..d7c62d3c 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -72,6 +72,15 @@ class Audio CLEO_RegisterCallback(eCallbackId::MainWindowFocus, OnMainWindowFocus); } + ~Audio() + { + CLEO_UnregisterCallback(eCallbackId::GameBegin, OnGameBegin); + CLEO_UnregisterCallback(eCallbackId::GameProcess, OnGameProcess); + CLEO_UnregisterCallback(eCallbackId::GameEnd, OnGameEnd); + CLEO_UnregisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); + CLEO_UnregisterCallback(eCallbackId::MainWindowFocus, OnMainWindowFocus); + } + static void __stdcall OnGameBegin(DWORD saveSlot) { soundSystem.Init(); diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index fe72ebe6..5e49836f 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -64,6 +64,14 @@ class DebugUtils CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); } + ~DebugUtils() + { + CLEO_UnregisterCallback(eCallbackId::Log, OnLog); + CLEO_UnregisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); + CLEO_UnregisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); + CLEO_UnregisterCallback(eCallbackId::ScriptsFinalize, OnScriptsFinalize); + } + // ---------------------------------------------- event callbacks ------------------------------------------------- static void WINAPI OnScriptsFinalize() diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index fbdec64f..04acfa8d 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -80,6 +80,11 @@ class FileSystemOperations CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); } + ~FileSystemOperations() + { + CLEO_UnregisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); + } + //0A99=1,set_current_directory %1b:userdir/rootdir% static OpcodeResult __stdcall opcode_0A99(CRunningScript* thread) { diff --git a/cleo_plugins/MemoryOperations/MemoryOperations.cpp b/cleo_plugins/MemoryOperations/MemoryOperations.cpp index 8c8937b1..d2ecb49d 100644 --- a/cleo_plugins/MemoryOperations/MemoryOperations.cpp +++ b/cleo_plugins/MemoryOperations/MemoryOperations.cpp @@ -71,6 +71,11 @@ class MemoryOperations CLEO_RegisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); } + ~MemoryOperations() + { + CLEO_UnregisterCallback(eCallbackId::ScriptsFinalize, OnFinalizeScriptObjects); + } + static void __stdcall OnFinalizeScriptObjects() { TRACE("Cleaning up %d allocated memory blocks...", m_allocations.size()); diff --git a/cleo_plugins/Text/Text.cpp b/cleo_plugins/Text/Text.cpp index c44dac99..bd8086c4 100644 --- a/cleo_plugins/Text/Text.cpp +++ b/cleo_plugins/Text/Text.cpp @@ -23,6 +23,8 @@ class Text static const size_t MsgBigStyleCount = 7; static char msgBuffBig[MsgBigStyleCount][MAX_STR_LEN + 1]; + MemPatch patchCTextGet; + Text() { auto cleoVer = CLEO_GetVersion(); @@ -68,7 +70,15 @@ class Text CLEO_RegisterCallback(eCallbackId::GameEnd, OnGameEnd); // install hooks - MemPatchJump(0x006A0050, &HOOK_CTextGet); // FUNC_CText__Get from CText.cpp + patchCTextGet = MemPatchJump(0x006A0050, &HOOK_CTextGet); // FUNC_CText__Get from CText.cpp + } + + ~Text() + { + CLEO_UnregisterCallback(eCallbackId::GameBegin, OnGameBegin); + CLEO_UnregisterCallback(eCallbackId::GameEnd, OnGameEnd); + + patchCTextGet.Apply(); // undo hook } static void __stdcall OnGameBegin(DWORD saveSlot) diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 918422f8..823e92ad 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -463,7 +463,9 @@ eGameVersion WINAPI CLEO_GetGameVersion(); BOOL WINAPI CLEO_RegisterOpcode(WORD opcode, _pOpcodeHandler callback); BOOL WINAPI CLEO_RegisterCommand(const char* commandName, _pOpcodeHandler callback); // uses cleo\.CONFIG\sa.json to obtain opcode number from name + void WINAPI CLEO_RegisterCallback(eCallbackId id, void* func); +void WINAPI CLEO_UnregisterCallback(eCallbackId id, void* func); // script utils diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 0aa22c0a..30d42e59 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -16,6 +16,7 @@ #include "CPools.h" // from GTA Plugin SDK #include "shellapi.h" // game window minimize/maximize support #include +#include #include namespace CLEO @@ -232,15 +233,45 @@ namespace CLEO BOOL needTerminator = false; }; - static void MemPatchJump(size_t position, void* jumpTarget) + class MemPatch + { + void* address = nullptr; + std::vector buffer; + + public: + MemPatch() + { + } + + MemPatch(void* src, size_t size) : address(src), buffer(size) + { + memcpy(buffer.data(), src, size); + } + + void Apply() + { + if (!buffer.empty()) + { + DWORD oldProtect; + VirtualProtect(address, buffer.size(), PAGE_EXECUTE_READWRITE, &oldProtect); + memcpy(buffer.data(), address, buffer.size()); + } + } + }; + + static MemPatch MemPatchJump(size_t position, void* jumpTarget) { DWORD oldProtect; VirtualProtect((LPVOID)position, 5, PAGE_EXECUTE_READWRITE, &oldProtect); + MemPatch original((void*)position, 5); + *(BYTE*)position = 0xE9; // asm: jmp position += sizeof(BYTE); *(DWORD*)position = (DWORD)jumpTarget - position - 4; + + return original; } static void* MemPatchCall(size_t position, void* newFunction) diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 48f6957e..4a089436 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -188,6 +188,11 @@ namespace CLEO m_callbacks[id].insert(func); } + void CCleoInstance::RemoveCallback(eCallbackId id, void* func) + { + m_callbacks[id].erase(func); + } + const std::set& CCleoInstance::GetCallbacks(eCallbackId id) { return m_callbacks[id]; @@ -216,6 +221,11 @@ namespace CLEO GetInstance().AddCallback(id, func); } + void WINAPI CLEO_UnregisterCallback(eCallbackId id, void* func) + { + GetInstance().RemoveCallback(id, func); + } + void __cdecl CCleoInstance::OnDrawingFinished() { GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks diff --git a/source/CleoBase.h b/source/CleoBase.h index 8b5180b8..7c129e3e 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -47,6 +47,7 @@ namespace CLEO bool IsStarted() const { return m_bStarted; } void AddCallback(eCallbackId id, void* func); + void RemoveCallback(eCallbackId id, void* func); const std::set& GetCallbacks(eCallbackId id); void CallCallbacks(eCallbackId id); void CallCallbacks(eCallbackId id, DWORD arg); diff --git a/source/cleo.def b/source/cleo.def index 10fc778d..59112e0c 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -28,29 +28,30 @@ EXPORTS _CLEO_AddScriptDeleteDelegate@4 @25 _CLEO_RemoveScriptDeleteDelegate@4 @26 _CLEO_RegisterCallback@8 @27 - _CLEO_GetVarArgCount@4 @28 - _CLEO_SkipUnusedVarArgs@4 @29 - _CLEO_ReadParamsFormatted@16 @30 - _CLEO_GetScriptVersion@4 @31 - _CLEO_GetScriptInfoStr@16 @32 - _CLEO_GetScriptParamInfoStr@12 @33 - _CLEO_ResolvePath@12 @34 - _CLEO_ListDirectory@16 @35 - _CLEO_StringListFree@8 @36 - _CLEO_GetScriptDebugMode@4 @37 - _CLEO_SetScriptDebugMode@8 @38 - _CLEO_Log@8 @39 - _CLEO_ReadStringParamWriteBuffer@16 @40 - _CLEO_GetOpcodeParamsArray@0 @41 - _CLEO_GetParamsHandledCount@0 @42 - _CLEO_PeekIntOpcodeParam@4 @43 - _CLEO_PeekFloatOpcodeParam@4 @44 - _CLEO_PeekPointerToScriptVariable@4 @45 - _CLEO_GetScriptByName@16 @46 - _CLEO_GetScriptByFilename@8 @47 - _CLEO_GetScriptFilename@4 @48 - _CLEO_GetScriptWorkDir@4 @49 - _CLEO_SetScriptWorkDir@8 @50 - _CLEO_RegisterCommand@8 @51 - _CLEO_IsScriptRunning@4 @52 - _CLEO_TerminateScript@4 @53 + _CLEO_UnregisterCallback@8 @28 + _CLEO_GetVarArgCount@4 @29 + _CLEO_SkipUnusedVarArgs@4 @30 + _CLEO_ReadParamsFormatted@16 @31 + _CLEO_GetScriptVersion@4 @32 + _CLEO_GetScriptInfoStr@16 @33 + _CLEO_GetScriptParamInfoStr@12 @34 + _CLEO_ResolvePath@12 @35 + _CLEO_ListDirectory@16 @36 + _CLEO_StringListFree@8 @37 + _CLEO_GetScriptDebugMode@4 @38 + _CLEO_SetScriptDebugMode@8 @39 + _CLEO_Log@8 @40 + _CLEO_ReadStringParamWriteBuffer@16 @41 + _CLEO_GetOpcodeParamsArray@0 @42 + _CLEO_GetParamsHandledCount@0 @43 + _CLEO_PeekIntOpcodeParam@4 @44 + _CLEO_PeekFloatOpcodeParam@4 @45 + _CLEO_PeekPointerToScriptVariable@4 @46 + _CLEO_GetScriptByName@16 @47 + _CLEO_GetScriptByFilename@8 @48 + _CLEO_GetScriptFilename@4 @49 + _CLEO_GetScriptWorkDir@4 @50 + _CLEO_SetScriptWorkDir@8 @51 + _CLEO_RegisterCommand@8 @52 + _CLEO_IsScriptRunning@4 @53 + _CLEO_TerminateScript@4 @54 From 8e5025b15e09db949d20ac6a6a686cb9d38270b8 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 10 Sep 2024 17:15:47 +0200 Subject: [PATCH 185/216] cleo.log improvements (#182) * cleo.log improvements * Added peer script separator to log. * fixup! Added peer script separator to log. --- cleo_plugins/Audio/CSoundSystem.cpp | 49 ++++++++++++++++++++--------- cleo_plugins/Text/CTextManager.cpp | 5 ++- source/CCustomOpcodeSystem.cpp | 1 + source/CPluginSystem.cpp | 3 ++ source/CScriptEngine.cpp | 5 +-- source/CleoBase.cpp | 2 ++ 6 files changed, 48 insertions(+), 17 deletions(-) diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index b5d148af..08179106 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -20,14 +20,22 @@ namespace CLEO void EnumerateBassDevices(int& total, int& enabled, int& default_device) { + TRACE(""); // separator + TRACE("Listing audio devices:"); + BASS_DEVICEINFO info; - for (default_device = -1, enabled = 0, total = 0; BASS_GetDeviceInfo(total, &info); ++total) + enabled = 0; + default_device = -1; + for (total = 0; BASS_GetDeviceInfo(total, &info); ++total) { - if (info.flags & BASS_DEVICE_ENABLED) ++enabled; if (info.flags & BASS_DEVICE_DEFAULT) default_device = total; - TRACE("Found sound device %d%s: %s", total, default_device == total ? - " (default)" : "", info.name); + + bool isEnabled = info.flags & BASS_DEVICE_ENABLED; + if (isEnabled) ++enabled; + + TRACE(" %d: %s%s", total, info.name, isEnabled ? "" : " (disabled)"); } + TRACE(" Default device index: %d", default_device); } bool isNetworkSource(const char* path) @@ -58,19 +66,32 @@ namespace CLEO LegacyModeDefaultStreamType = (eStreamType)GetPrivateProfileInt("General", "LegacyModeDefaultStreamType", 0, config.c_str()); allowNetworkSources = GetPrivateProfileInt("General", "AllowNetworkSources", 1, config.c_str()) != 0; - int default_device, total_devices, enabled_devices; - EnumerateBassDevices(total_devices, enabled_devices, default_device); + int deviceIndex, total_devices, enabled_devices; + EnumerateBassDevices(total_devices, enabled_devices, deviceIndex); - int forceDevice = GetPrivateProfileInt("General", "AudioDevice", -1, config.c_str()); - BASS_DEVICEINFO info = { nullptr, nullptr, 0 }; - if (forceDevice != -1 && BASS_GetDeviceInfo(forceDevice, &info) && (info.flags & BASS_DEVICE_ENABLED)) - default_device = forceDevice; + BASS_DEVICEINFO info = { "Unknown device", nullptr, 0 }; + BASS_GetDeviceInfo(deviceIndex, &info); - TRACE("On system found %d devices, %d enabled devices, assuming device to use: %d (%s)", - total_devices, enabled_devices, default_device, BASS_GetDeviceInfo(default_device, &info) ? - info.name : "Unknown device"); + int forceIndex = GetPrivateProfileInt("General", "AudioDevice", -1, config.c_str()); + if (forceIndex != -1) + { + BASS_DEVICEINFO forceInfo = { "Unknown device", nullptr, 0 }; + if (BASS_GetDeviceInfo(forceIndex, &forceInfo) && forceInfo.flags & BASS_DEVICE_ENABLED) + { + TRACE("Force selecting audio device #%d: %s", forceIndex, forceInfo.name); + deviceIndex = forceIndex; + } + else + { + LOG_WARNING(0, "Failed to force select device #%d! Selecting default audio device #%d: %s", forceIndex, deviceIndex, info.name); + } + } + else + { + TRACE("Selecting default audio device #%d: %s", deviceIndex, info.name); + } - if (BASS_Init(default_device, 44100, BASS_DEVICE_3D, RsGlobal.ps->window, nullptr) && + if (BASS_Init(deviceIndex, 44100, BASS_DEVICE_3D, RsGlobal.ps->window, nullptr) && BASS_Set3DFactors(1.0f, 3.0f, 80.0f) && BASS_Set3DPosition(&pos, &vel, &front, &top)) { diff --git a/cleo_plugins/Text/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp index 57211694..91b524d6 100644 --- a/cleo_plugins/Text/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -92,6 +92,9 @@ namespace CLEO void CTextManager::LoadFxts() { + TRACE(""); // separator + TRACE("Loading CLEO text files..."); + // create FXT directory if not present yet FS::create_directory(FS::path(Gta_Root_Dir_Path).append("cleo\\cleo_text")); @@ -103,7 +106,7 @@ namespace CLEO { std::ifstream stream(list.strings[i]); auto result = ParseFxtFile(stream); - TRACE("Added %d new FXT entries from file %s", result, list.strings[i]); + TRACE("Added %d new FXT entries from file '%s'", result, list.strings[i]); } catch (std::exception& ex) { diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index d3d5b51f..830f32d3 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -239,6 +239,7 @@ namespace CLEO { if (initialized) return; + TRACE(""); // separator TRACE("Initializing CLEO core opcodes..."); CLEO_RegisterOpcode(0x0051, opcode_0051); diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index 97b1af09..3c8b2e5f 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -57,6 +57,7 @@ void CPluginSystem::LoadPlugins() CLEO_StringListFree(files); }; + TRACE(""); // separator TRACE("Listing CLEO plugins:"); ScanPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "SA.", ".cleo"); ScanPluginsDir(FS::path(Filepath_Cleo).append("cleo_plugins").string(), "", ".cleo"); // legacy plugins in new location @@ -68,6 +69,7 @@ void CPluginSystem::LoadPlugins() for (auto it = paths.crbegin(); it != paths.crend(); it++) { const auto filename = it->c_str(); + TRACE(""); // separator TRACE("Loading plugin '%s'", filename); HMODULE hlib = LoadLibrary(filename); @@ -79,6 +81,7 @@ void CPluginSystem::LoadPlugins() plugins.push_back(hlib); } + TRACE(""); // separator } else { diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index cae3f7b8..86d73238 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1023,7 +1023,7 @@ namespace CLEO if (!found.empty()) { - TRACE("Starting CLEO scripts"); + TRACE("Starting CLEO scripts..."); for (const auto& path : found) { @@ -1476,7 +1476,8 @@ namespace CLEO LastSearchPed(0), LastSearchCar(0), LastSearchObj(0), CompatVer(CLEO_VER_CUR) { - TRACE("Loading custom script %s...", szFileName); + TRACE(""); // separator + TRACE("Loading custom script '%s'...", szFileName); bIsCustom = true; bIsMission = bUseMissionCleanup = bIsMiss; diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 4a089436..8b598c0d 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -164,6 +164,7 @@ namespace CLEO saveSlot = MenuManager->m_bWantToLoad ? MenuManager->m_nSelectedSaveGame : -1; + TRACE(""); // separator TRACE("Starting new game, save slot: %d", saveSlot); // execute registered callbacks @@ -175,6 +176,7 @@ namespace CLEO if (!m_bGameInProgress) return; m_bGameInProgress = false; + TRACE(""); // separator TRACE("Ending current game"); GetInstance().CallCallbacks(eCallbackId::GameEnd); // execute registered callbacks ScriptEngine.GameEnd(); From a02d8d0d2e012d253251bf869ff90d4949663c60 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 10 Sep 2024 21:56:30 +0200 Subject: [PATCH 186/216] Patch in CCustomScript::ResolvePath for ModLoader support. (#184) * Patch in CCustomScript::ResolvePath for ModLoater support. * Restored checking for safe file paths * IsFilepathSafe updated --- cleo_sdk/CLEO_Utils.h | 26 +++++++++++++++++++++++++- source/CScriptEngine.cpp | 7 ++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 30d42e59..b0ae2bd2 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -15,6 +15,7 @@ #include "CLEO.h" #include "CPools.h" // from GTA Plugin SDK #include "shellapi.h" // game window minimize/maximize support +#include #include #include #include @@ -118,6 +119,29 @@ namespace CLEO return std::move(info); } + // does file path points inside game directories? (game root or user files) + static bool IsFilepathSafe(CLEO::CRunningScript* thread, const char* path) + { + auto IsSubpath = [](std::filesystem::path path, std::filesystem::path base) + { + auto relative = std::filesystem::relative(base, path); + return relative.empty() || relative.native()[0] != '.'; + }; + + auto fsPath = std::filesystem::path(path); + if (!fsPath.is_absolute()) + { + fsPath = CLEO_GetScriptWorkDir(thread) / fsPath; + } + + if (IsSubpath(fsPath, Gta_Root_Dir_Path) || IsSubpath(fsPath, Gta_User_Dir_Path)) + { + return true; + } + + return false; + } + static bool IsObjectHandleValid(DWORD handle) { // get handle info @@ -580,7 +604,7 @@ namespace CLEO #define OPCODE_READ_PARAMS_FORMATTED(_format, _varName) char _varName[2 * MAX_STR_LEN + 1]; char* _varName##Ok = CLEO_ReadParamsFormatted(thread, _format, _varName, sizeof(_varName)); #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; \ - if(_strnicmp(##_varName, Gta_Root_Dir_Path, strlen(Gta_Root_Dir_Path)) != 0 && _strnicmp(##_varName, Gta_User_Dir_Path, strlen(Gta_User_Dir_Path)) != 0) { SHOW_ERROR("Forbidden file path '%s' outside game directories in script %s \nScript suspended.", ##_varName, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if(!IsFilepathSafe(thread, ##_varName)) { SHOW_ERROR("Forbidden file path '%s' outside game directories in script %s \nScript suspended.", ##_varName, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 86d73238..17882522 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -708,7 +708,12 @@ namespace CLEO if(customWorkDir != nullptr) fsPath = ResolvePath(customWorkDir) / fsPath; else - fsPath = GetWorkDir() / fsPath; + { + if (Filepath_Root.compare(GetWorkDir()) != 0) // ModLoader support: do not expand game dir relative paths + { + fsPath = GetWorkDir() / fsPath; + } + } } return FS::weakly_canonical(fsPath).string(); From b099c1c9ba2f6680a686e2f0cefde1e33dda5100 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 11 Sep 2024 17:43:41 +0200 Subject: [PATCH 187/216] Unloading CLEO plugins. (#185) Unloading CLEO plugins. Log improvements --- cleo_plugins/Audio/CSoundSystem.cpp | 1 + source/CCustomOpcodeSystem.h | 6 ++++-- source/CDebug.h | 1 + source/CPluginSystem.cpp | 21 +++++++++++++++++++-- source/CPluginSystem.h | 13 +++++++++++-- source/CScriptEngine.cpp | 1 + source/CleoBase.cpp | 2 ++ 7 files changed, 39 insertions(+), 6 deletions(-) diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index 08179106..719ed529 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -46,6 +46,7 @@ namespace CLEO CSoundSystem::~CSoundSystem() { + TRACE(""); // seaprator TRACE("Finalizing SoundSystem..."); Clear(); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 9059bdc2..fd450c7c 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -38,8 +38,10 @@ namespace CLEO void Init(); ~CCustomOpcodeSystem() { - TRACE("Last opcode executed: %04X", lastOpcode); - TRACE("Previous opcode executed: %04X", prevOpcode); + TRACE(""); // separator + TRACE("Custom Opcode System finalized:"); + TRACE(" Last opcode executed: %04X", lastOpcode); + TRACE(" Previous opcode executed: %04X", prevOpcode); } static bool RegisterOpcode(WORD opcode, CustomOpcodeHandler callback); diff --git a/source/CDebug.h b/source/CDebug.h index c1da6c0e..c6223d54 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -24,6 +24,7 @@ class CDebug ~CDebug() { + CLEO::Trace(CLEO::eLogLevel::Default, ""); // separator CLEO::Trace(CLEO::eLogLevel::Default, "Log finished."); } diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index 3c8b2e5f..1a6a6dcc 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -7,7 +7,7 @@ using namespace CLEO; CPluginSystem::~CPluginSystem() { - std::for_each(plugins.begin(), plugins.end(), FreeLibrary); + UnloadPlugins(); } void CPluginSystem::LoadPlugins() @@ -79,7 +79,7 @@ void CPluginSystem::LoadPlugins() continue; } - plugins.push_back(hlib); + plugins.emplace_back(filename, hlib); } TRACE(""); // separator } @@ -91,6 +91,23 @@ void CPluginSystem::LoadPlugins() pluginsLoaded = true; } +void CPluginSystem::UnloadPlugins() +{ + if (!pluginsLoaded) return; + + TRACE(""); // separator + TRACE("Unloading CLEO plugins:"); + for (const auto& plugin : plugins) + { + TRACE(" - Unloading '%s' at 0x%08X", plugin.name.c_str(), plugin.handle); + FreeLibrary(plugin.handle); + } + TRACE("CLEO plugins unloaded"); + + plugins.clear(); + pluginsLoaded = false; +} + size_t CPluginSystem::GetNumPlugins() const { return plugins.size(); diff --git a/source/CPluginSystem.h b/source/CPluginSystem.h index a1b67287..7b99d4a5 100644 --- a/source/CPluginSystem.h +++ b/source/CPluginSystem.h @@ -2,8 +2,8 @@ #include "FileEnumerator.h" #include "CDebug.h" #include -#include #include +#include #include @@ -11,7 +11,15 @@ namespace CLEO { class CPluginSystem { - std::list plugins; + struct PluginEntry + { + std::string name; + HMODULE handle = nullptr; + + PluginEntry() = default; + PluginEntry(std::string name, HMODULE handle) : name(name), handle(handle) {} + }; + std::list plugins; bool pluginsLoaded = false; public: @@ -20,6 +28,7 @@ namespace CLEO ~CPluginSystem(); void LoadPlugins(); + void UnloadPlugins(); size_t GetNumPlugins() const; }; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 17882522..fe15d471 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -994,6 +994,7 @@ namespace CLEO void CScriptEngine::LoadCustomScripts() { + TRACE(""); // separator TRACE("Listing CLEO scripts:"); std::set found; diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 8b598c0d..96bf2752 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -155,6 +155,8 @@ namespace CLEO m_bStarted = false; ScriptEngine.GameEnd(); + + PluginSystem.UnloadPlugins(); } void CCleoInstance::GameBegin() From d68bec4abb4a8dc935aea79c404550acdf8dec25 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 13 Sep 2024 16:56:23 +0200 Subject: [PATCH 188/216] Fixed IsFilepathSafe util function. (#190) --- cleo_sdk/CLEO_Utils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index b0ae2bd2..35d0ca0f 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -122,10 +122,10 @@ namespace CLEO // does file path points inside game directories? (game root or user files) static bool IsFilepathSafe(CLEO::CRunningScript* thread, const char* path) { - auto IsSubpath = [](std::filesystem::path path, std::filesystem::path base) + auto IsSubpath = [](std::filesystem::path path, std::filesystem::path base) { - auto relative = std::filesystem::relative(base, path); - return relative.empty() || relative.native()[0] != '.'; + auto relative = std::filesystem::relative(path, base); + return !relative.empty() && *relative.begin() != ".."; }; auto fsPath = std::filesystem::path(path); From 6937fcf206c6c0c011a0d2a717b942a618693572 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Fri, 13 Sep 2024 16:56:41 +0200 Subject: [PATCH 189/216] Fixed set_audio_stream_source_size opcode not working (#191) --- cleo_plugins/Audio/Audio.cpp | 2 +- cleo_plugins/Audio/CAudioStream.cpp | 2 +- cleo_plugins/Audio/CAudioStream.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cleo_plugins/Audio/Audio.cpp b/cleo_plugins/Audio/Audio.cpp index d7c62d3c..192c66d6 100644 --- a/cleo_plugins/Audio/Audio.cpp +++ b/cleo_plugins/Audio/Audio.cpp @@ -386,7 +386,7 @@ class Audio auto stream = (CAudioStream*)OPCODE_READ_PARAM_UINT(); VALIDATE_STREAM(); auto radius = OPCODE_READ_PARAM_FLOAT(); - if (stream) stream->Set3dSize(radius); + if (stream) stream->Set3dSourceSize(radius); return OR_CONTINUE; } diff --git a/cleo_plugins/Audio/CAudioStream.cpp b/cleo_plugins/Audio/CAudioStream.cpp index 9be1eea5..03dae03b 100644 --- a/cleo_plugins/Audio/CAudioStream.cpp +++ b/cleo_plugins/Audio/CAudioStream.cpp @@ -239,7 +239,7 @@ void CAudioStream::Set3dPosition(const CVector& pos) // not applicable for 2d audio } -void CAudioStream::Set3dSize(float radius) +void CAudioStream::Set3dSourceSize(float radius) { // not applicable for 2d audio } diff --git a/cleo_plugins/Audio/CAudioStream.h b/cleo_plugins/Audio/CAudioStream.h index b68a3a95..fdfa2b60 100644 --- a/cleo_plugins/Audio/CAudioStream.h +++ b/cleo_plugins/Audio/CAudioStream.h @@ -47,7 +47,7 @@ namespace CLEO // 3d virtual void Set3dPosition(const CVector& pos); - virtual void Set3dSize(float radius); + virtual void Set3dSourceSize(float radius); virtual void Link(CPlaceable* placable = nullptr); virtual void Process(); From 9670d556b8a59f73432348746588ad8bcbb041c1 Mon Sep 17 00:00:00 2001 From: Seemann Date: Fri, 13 Sep 2024 10:56:56 -0400 Subject: [PATCH 190/216] allow updating existing release on publish (#192) --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fbc11198..776413e8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -165,3 +165,4 @@ jobs: tag: ${{ github.ref_name }} prerelease: ${{ contains(github.ref_name, 'beta') || contains(github.ref_name, 'alpha') }} artifacts: "SA.CLEO_*.zip" + allowUpdates: true From 3f917f166814a36aec9517327f0cd841a3cc3648 Mon Sep 17 00:00:00 2001 From: Seemann Date: Sun, 15 Sep 2024 08:20:51 -0400 Subject: [PATCH 191/216] cleo path is relative (#193) --- source/CScriptEngine.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index fe15d471..5fd105ff 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -708,12 +708,15 @@ namespace CLEO if(customWorkDir != nullptr) fsPath = ResolvePath(customWorkDir) / fsPath; else - { - if (Filepath_Root.compare(GetWorkDir()) != 0) // ModLoader support: do not expand game dir relative paths - { - fsPath = GetWorkDir() / fsPath; - } - } + fsPath = GetWorkDir() / fsPath; + + auto resolved = FS::weakly_canonical(fsPath).string(); + + // ModLoader support: do not expand game dir relative paths + if (resolved.find(Filepath_Root) == 0) + return FS::relative(resolved, Filepath_Root).string(); + else + return resolved; } return FS::weakly_canonical(fsPath).string(); From 683de0f8f296a79da15587219c7a2918162c0b35 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 18 Sep 2024 23:13:05 +0200 Subject: [PATCH 192/216] Path resolving performance improvements. (#197) * Path resolving performance improvements. * Modloader hack fixes. * Added Filepath_Game and Filepath_User to CLEO utils. * Initialize globals once. * ModLoader fix for CLEO plugins. * fixup! ModLoader fix for CLEO plugins. * fixup! ModLoader fix for CLEO plugins. * fixup! ModLoader fix for CLEO plugins. * Added CLEO_GetGameDirectory and CLEO_GetUserDirectory exports. * fixup! Added CLEO_GetGameDirectory and CLEO_GetUserDirectory exports. --- CHANGELOG.md | 2 + cleo_plugins/DebugUtils/DebugUtils.cpp | 10 ++- cleo_plugins/Text/CTextManager.cpp | 2 +- cleo_sdk/CLEO.h | 2 + cleo_sdk/CLEO_Utils.h | 111 ++++++++++++++++++------ source/CCustomOpcodeSystem.cpp | 2 - source/CCustomOpcodeSystem.h | 1 - source/CGameVersionManager.cpp | 1 - source/CGameVersionManager.h | 1 - source/CPluginSystem.cpp | 12 ++- source/CScriptEngine.cpp | 114 +++++++++++-------------- source/CleoBase.cpp | 30 ++++--- source/cleo.def | 2 + source/stdafx.h | 59 ++++++++----- 14 files changed, 213 insertions(+), 136 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 258761a6..9fc877dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,8 @@ - CLEO_ResolvePath - CLEO_ListDirectory - CLEO_ListDirectoryFree + - CLEO_GetGameDirectory + - CLEO_GetUserDirectory - CLEO_GetScriptByName - CLEO_GetScriptByFilename - CLEO_GetScriptDebugMode diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index 5e49836f..13ef5f08 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -1,13 +1,15 @@ -#include "ScreenLog.h" -#include "CLEO.h" -#include "CLEO_Utils.h" -#include "CTimer.h" #include // keyboard #include #include #include #include +#include "CTimer.h" + +#include "CLEO.h" +#include "CLEO_Utils.h" +#include "ScreenLog.h" + using namespace CLEO; class DebugUtils diff --git a/cleo_plugins/Text/CTextManager.cpp b/cleo_plugins/Text/CTextManager.cpp index 91b524d6..5a6587b7 100644 --- a/cleo_plugins/Text/CTextManager.cpp +++ b/cleo_plugins/Text/CTextManager.cpp @@ -96,7 +96,7 @@ namespace CLEO TRACE("Loading CLEO text files..."); // create FXT directory if not present yet - FS::create_directory(FS::path(Gta_Root_Dir_Path).append("cleo\\cleo_text")); + FS::create_directory(std::string(CLEO_GetGameDirectory()) + "\\cleo\\cleo_text"); // load whole FXT files directory auto list = CLEO::CLEO_ListDirectory(nullptr, "cleo\\cleo_text\\*.fxt", false, true); diff --git a/cleo_sdk/CLEO.h b/cleo_sdk/CLEO.h index 823e92ad..1a1d3c83 100644 --- a/cleo_sdk/CLEO.h +++ b/cleo_sdk/CLEO.h @@ -538,6 +538,8 @@ void WINAPI CLEO_StringListFree(StringList list); // releases resources used by // Should be always used when working with files. Provides ModLoader compatibility void WINAPI CLEO_ResolvePath(CRunningScript* thread, char* inOutPath, DWORD pathMaxLen); // convert to absolute (file system) path StringList WINAPI CLEO_ListDirectory(CRunningScript* thread, const char* searchPath, BOOL listDirs, BOOL listFiles); // thread can be null, searchPath can contain wildcards. After use CLEO_StringListFree must be called on returned StringList to free its resources +LPCSTR WINAPI CLEO_GetGameDirectory(); // absolute game directory filepath without trailling path separator +LPCSTR WINAPI CLEO_GetUserDirectory(); // absolute game user files directory filepath without trailling path separator void WINAPI CLEO_Log(eLogLevel level, const char* msg); // add message to log diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 35d0ca0f..ec373658 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -15,8 +15,10 @@ #include "CLEO.h" #include "CPools.h" // from GTA Plugin SDK #include "shellapi.h" // game window minimize/maximize support +#include #include #include +#include #include #include @@ -76,25 +78,11 @@ namespace CLEO OPCODE_WRITE_PARAM_PTR(value) // memory address */ - static const char* Gta_Root_Dir_Path = (char*)0x00B71AE0; - static const char* Gta_User_Dir_Path = (char*)0x00C92368; - static bool IsLegacyScript(CLEO::CRunningScript* thread) { return CLEO_GetScriptVersion(thread) < CLEO_VER_5; } - // this plugin's config file - static std::string GetConfigFilename() - { - std::string configFile = Gta_Root_Dir_Path; - if (!configFile.empty() && configFile.back() != '\\') configFile.push_back('\\'); - - configFile += "cleo\\cleo_plugins\\" TARGET_NAME ".ini"; - - return configFile; - } - static std::string StringPrintf(const char* format, ...) { va_list args; @@ -112,6 +100,23 @@ namespace CLEO return result; } + static bool StringStartsWith(const std::string_view str, const std::string_view prefix, bool caseSensitive = true) + { + if (str.length() < prefix.length()) + { + return false; + } + + if (caseSensitive) + { + return strncmp(str.data(), prefix.data(), prefix.length()) == 0; + } + else + { + return _strnicmp(str.data(), prefix.data(), prefix.length()) == 0; + } + } + static std::string ScriptInfoStr(CLEO::CRunningScript* thread) { std::string info(1024, '\0'); @@ -119,27 +124,79 @@ namespace CLEO return std::move(info); } - // does file path points inside game directories? (game root or user files) - static bool IsFilepathSafe(CLEO::CRunningScript* thread, const char* path) + // Normalize filepath, collapse all parent directory references. Input should be absolute path without expandable %variables% + static void FilepathNormalize(std::string& path, bool normalizeCase = true) + { + if (path.empty()) return; + + std::replace(path.begin(), path.end(), '/', '\\'); + if (normalizeCase) std::transform(path.begin(), path.end(), path.begin(), [](unsigned char c) { return tolower(c); }); // to lower case + + // collapse references to parent directory + const auto ParentRef = "\\..\\"; + const auto ParentRefLen = 4; + + size_t refPos = path.find(ParentRef); + while (refPos != std::string::npos && refPos > 0) + { + size_t parentPos = path.rfind('\\', refPos - 1); // find start of parent name + + if (parentPos == std::string::npos) + return; // parent must be root of the path then. We want to keep absolute path, let it be as is (even if "C:\..\" makes no sense) + + path.replace(parentPos, (refPos - parentPos) + ParentRefLen - 1, ""); // remove parent and parent reference + + refPos = path.find(ParentRef); // find next + } + + while(path.back() == '\\') path.pop_back(); // remove trailing path separator(s) + } + + // strip parent prefix from filepath if present + static void FilepathRemoveParent(std::string& path, const std::string_view base) + { + if (path.length() < base.length()) return; // can not hold that prefix + if (!StringStartsWith(path, base, false)) return; + if (path.length() > base.length() && path[base.length()] != '\\') return; // just similar base + + path.replace(0, base.length() + 1, ""); // remove path separator too if present + } + + // this plugin's config file + static std::string GetConfigFilename() { - auto IsSubpath = [](std::filesystem::path path, std::filesystem::path base) + std::string path = CLEO_GetGameDirectory(); + path += "\\cleo\\cleo_plugins\\"; + path += TARGET_NAME; + path += ".ini"; + return path; + } + + // does normalized file path points inside game directories? (game root or user files) + static bool FilepathIsSafe(CLEO::CRunningScript* thread, const char* path) + { + if (strchr(path, '%') != nullptr) { - auto relative = std::filesystem::relative(path, base); - return !relative.empty() && *relative.begin() != ".."; - }; + return false; // do not allow paths containing expandable variables + } - auto fsPath = std::filesystem::path(path); - if (!fsPath.is_absolute()) + std::string absolute; + if (!std::filesystem::path(path).is_absolute()) { - fsPath = CLEO_GetScriptWorkDir(thread) / fsPath; + absolute = CLEO_GetScriptWorkDir(thread); + absolute += '\\'; + absolute += path; + FilepathNormalize(absolute, false); + path = absolute.c_str(); } - if (IsSubpath(fsPath, Gta_Root_Dir_Path) || IsSubpath(fsPath, Gta_User_Dir_Path)) + if (!StringStartsWith(path, CLEO_GetGameDirectory(), false) && + !StringStartsWith(path, CLEO_GetUserDirectory(), false)) { - return true; + return false; } - return false; + return true; } static bool IsObjectHandleValid(DWORD handle) @@ -604,7 +661,7 @@ namespace CLEO #define OPCODE_READ_PARAMS_FORMATTED(_format, _varName) char _varName[2 * MAX_STR_LEN + 1]; char* _varName##Ok = CLEO_ReadParamsFormatted(thread, _format, _varName, sizeof(_varName)); #define OPCODE_READ_PARAM_FILEPATH(_varName) char _buff_##_varName[512]; const char* ##_varName = _readParamText(thread, _buff_##_varName, 512); if(##_varName != nullptr) ##_varName = _buff_##_varName; if(_paramWasString()) CLEO_ResolvePath(thread, _buff_##_varName, 512); else return OpcodeResult::OR_INTERRUPT; \ - if(!IsFilepathSafe(thread, ##_varName)) { SHOW_ERROR("Forbidden file path '%s' outside game directories in script %s \nScript suspended.", ##_varName, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } + if(!FilepathIsSafe(thread, ##_varName)) { SHOW_ERROR("Forbidden file path '%s' outside game directories in script %s \nScript suspended.", ##_varName, ScriptInfoStr(thread).c_str()); return thread->Suspend(); } #define OPCODE_READ_PARAM_PTR() _readParam(thread).pParam; \ if (!_paramWasInt()) { SHOW_ERROR("Input argument %s expected to be integer, got %s in script %s\nScript suspended.", GetParamInfo().c_str(), CLEO::ToKindStr(_lastParamType, _lastParamArrayType), CLEO::ScriptInfoStr(thread).c_str()); return thread->Suspend(); } \ diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 830f32d3..d8617a35 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -79,7 +79,6 @@ namespace CLEO void(__thiscall * ProcessScript)(CRunningScript*); - const char * (__cdecl * GetUserDirectory)(); void(__cdecl * ChangeToUserDir)(); void(__cdecl * ChangeToProgramDir)(const char *); @@ -215,7 +214,6 @@ namespace CLEO MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); MemWrite(0x00469EF0, &customOpcodeHandlers); // TODO: game version translation - GetUserDirectory = gvm.TranslateMemoryAddress(MA_GET_USER_DIR_FUNCTION); ChangeToUserDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_USER_DIR_FUNCTION); ChangeToProgramDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_PROGRAM_DIR_FUNCTION); FindGroundZ = gvm.TranslateMemoryAddress(MA_FIND_GROUND_Z_FUNCTION); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index fd450c7c..919c8e41 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -8,7 +8,6 @@ namespace CLEO { typedef OpcodeResult(__stdcall * CustomOpcodeHandler)(CRunningScript*); - extern const char* (__cdecl* GetUserDirectory)(); extern void(__cdecl* ChangeToUserDir)(); extern void(__cdecl* ChangeToProgramDir)(const char*); diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index e7da16de..2227cfaa 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -78,7 +78,6 @@ namespace CLEO { 0x00B74490, memory_und, 0x00B74490, 0x00B76B10, 0x00C01038 }, // MA_PED_POOL, { 0x00B74494, memory_und, 0x00B74494, 0x00B76B14, 0x00C0103C }, // MA_VEHICLE_POOL, { 0x00B7449C, memory_und, 0x00B7449C, 0x00B76B18, 0x00C01044 }, // MA_OBJECT_POOL, - { 0x00744FB0, memory_und, 0x00744FB0, 0x007457E0, 0x0077EDC0 }, // MA_GET_USER_DIR_FUNCTION, { 0x00538860, memory_und, 0x00538860, 0x00538D00, 0x0054A730 }, // MA_CHANGE_TO_USER_DIR_FUNCTION, { 0x005387D0, memory_und, 0x005387D0, 0x00538C70, 0x0054A680 }, // MA_CHANGE_TO_PROGRAM_DIR_FUNCTION, { 0x00569660, memory_und, 0x00569660, 0x00569B00, 0x00583CB0 }, // MA_FIND_GROUND_Z_FUNCTION, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index 34867776..9d356bc5 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -94,7 +94,6 @@ namespace CLEO MA_PED_POOL, MA_VEHICLE_POOL, MA_OBJECT_POOL, - MA_GET_USER_DIR_FUNCTION, MA_CHANGE_TO_USER_DIR_FUNCTION, MA_CHANGE_TO_PROGRAM_DIR_FUNCTION, MA_FIND_GROUND_Z_FUNCTION, diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index 1a6a6dcc..c18f6da6 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -68,14 +68,18 @@ void CPluginSystem::LoadPlugins() { for (auto it = paths.crbegin(); it != paths.crend(); it++) { - const auto filename = it->c_str(); + std::string filename = *it; + + // ModLoader support: keep game dir relative paths relative + FilepathRemoveParent(filename, Filepath_Game); + TRACE(""); // separator - TRACE("Loading plugin '%s'", filename); + TRACE("Loading plugin '%s'", filename.c_str()); - HMODULE hlib = LoadLibrary(filename); + HMODULE hlib = LoadLibrary(filename.c_str()); if (!hlib) { - LOG_WARNING(0, "Error loading plugin '%s'", filename); + LOG_WARNING(0, "Error loading plugin '%s'", filename.c_str()); continue; } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 5fd105ff..666cfc1b 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -684,79 +684,61 @@ namespace CLEO return {}; } - try + auto fsPath = FS::path(path); + + // check for virtual path root + enum class VPref{ None, Game, User, Script, Cleo, Modules } virtualPrefix = VPref::None; + if(!fsPath.empty()) { - auto fsPath = FS::path(path); + const auto root = fsPath.begin()->string(); // first path element + const auto r = root.c_str(); - // check for virtual path root - enum class VPref{ None, Game, User, Script, Cleo, Modules } virtualPrefix = VPref::None; - auto root = fsPath.begin(); - if(root != fsPath.end()) - { - if(*root == DIR_GAME) virtualPrefix = VPref::Game; - else if (*root == DIR_USER) virtualPrefix = VPref::User; - else if (*root == DIR_SCRIPT) virtualPrefix = VPref::Script; - else if (*root == DIR_CLEO) virtualPrefix = VPref::Cleo; - else if (*root == DIR_MODULES) virtualPrefix = VPref::Modules; - } + if(_strcmpi(r, DIR_GAME) == 0) virtualPrefix = VPref::Game; + else if (_strcmpi(r, DIR_USER) == 0) virtualPrefix = VPref::User; + else if (_strcmpi(r, DIR_SCRIPT) == 0) virtualPrefix = VPref::Script; + else if (_strcmpi(r, DIR_CLEO) == 0) virtualPrefix = VPref::Cleo; + else if (_strcmpi(r, DIR_MODULES) == 0) virtualPrefix = VPref::Modules; + } - // not virtual - if(virtualPrefix == VPref::None) + // not virtual + if(virtualPrefix == VPref::None) + { + if(fsPath.is_relative()) { - if(fsPath.is_relative()) - { - if(customWorkDir != nullptr) - fsPath = ResolvePath(customWorkDir) / fsPath; - else - fsPath = GetWorkDir() / fsPath; - - auto resolved = FS::weakly_canonical(fsPath).string(); - - // ModLoader support: do not expand game dir relative paths - if (resolved.find(Filepath_Root) == 0) - return FS::relative(resolved, Filepath_Root).string(); - else - return resolved; - } - - return FS::weakly_canonical(fsPath).string(); + if(customWorkDir != nullptr) + fsPath = ResolvePath(customWorkDir) / fsPath; + else + fsPath = GetWorkDir() / fsPath; } - // expand virtual paths - FS::path resolved; + auto result = fsPath.string(); + FilepathNormalize(result, false); - if (virtualPrefix == VPref::User) // user files location - { - resolved = GetUserDirectory(); - } - else - if (virtualPrefix == VPref::Script) // this script's source file location - { - resolved = GetScriptFileDir(); - } - else - { - // all remaing variants starts with game root - resolved = Filepath_Root; - - switch(virtualPrefix) - { - case(VPref::Cleo): resolved /= "cleo"; break; - case(VPref::Modules): resolved /= "cleo\\cleo_modules"; break; - } - } + // ModLoader support: make paths withing game directory relative to it + FilepathRemoveParent(result, Filepath_Game); - // append all but virtual prefix from original path - for(auto it = ++fsPath.begin(); it != fsPath.end(); it++) - resolved /= *it; - - return FS::weakly_canonical(resolved).string(); // collapse "..\" uses + return std::move(result); } - catch (const std::exception& ex) + + // expand virtual paths + FS::path resolved; + switch(virtualPrefix) { - TRACE("Error while resolving path: %s", ex.what()); - return {}; + case VPref::User: resolved = Filepath_User; break; + case VPref::Script: resolved = GetScriptFileDir(); break; + case VPref::Game: resolved = Filepath_Game; break; + case VPref::Cleo: resolved = Filepath_Cleo; break; + case VPref::Modules: resolved = Filepath_Cleo + "\\modules"; break; + default : resolved = ""; break; // should never happen } + + // append all but virtual prefix from original path + for (auto it = ++fsPath.begin(); it != fsPath.end(); it++) + resolved /= *it; + + auto result = resolved.string(); + FilepathNormalize(result, false); + return std::move(result); } std::string CCustomScript::GetInfoStr(bool currLineInfo) const @@ -968,17 +950,17 @@ namespace CLEO if (CGame::bMissionPackGame == 0) // regular main game { - MainScriptFileDir = FS::path(Filepath_Root).append("data\\script").string(); + MainScriptFileDir = Filepath_Game + "\\data\\script"; MainScriptFileName = "main.scm"; } else // mission pack { - MainScriptFileDir = FS::path(GetUserDirectory()).append(stringPrintf("MPACK\\MPACK%d", CGame::bMissionPackGame)).string(); + MainScriptFileDir = Filepath_User + stringPrintf("\\MPACK\\MPACK%d", CGame::bMissionPackGame); MainScriptFileName = "scr.scm"; } NativeScriptsDebugMode = GetPrivateProfileInt("General", "DebugMode", 0, Filepath_Config.c_str()) != 0; - MainScriptCurWorkDir = Filepath_Root; + MainScriptCurWorkDir = Filepath_Game; GetInstance().ModuleSystem.LoadCleoModules(); LoadState(GetInstance().saveSlot); @@ -1596,7 +1578,7 @@ namespace CLEO else { bDebugMode = GetInstance().ScriptEngine.NativeScriptsDebugMode; // global setting - workDir = Filepath_Root; // game root + workDir = Filepath_Game; // game root } using std::ios; diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 96bf2752..418b601c 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -116,12 +116,12 @@ namespace CLEO if (m_bStarted) return; // already started m_bStarted = true; - FS::create_directory(FS::path(Filepath_Root).append("cleo")); - FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_modules")); - FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_plugins")); - FS::create_directory(FS::path(Filepath_Root).append("cleo\\cleo_saves")); + FS::create_directory(Filepath_Cleo); + FS::create_directory(Filepath_Cleo + "\\cleo_modules"); + FS::create_directory(Filepath_Cleo + "\\cleo_plugins"); + FS::create_directory(Filepath_Cleo + "\\cleo_saves"); - OpcodeInfoDb.Load(FS::path(Filepath_Root).append("cleo\\.config\\sa.json").generic_string().c_str()); + OpcodeInfoDb.Load((Filepath_Cleo + "\\.config\\sa.json").c_str()); CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init GameMenu.Inject(CodeInjector); @@ -276,14 +276,14 @@ namespace CLEO if (!listDirs && !listFiles) return {}; // nothing to list, done + // make absolute auto fsSearchPath = FS::path(searchPath); if (!fsSearchPath.is_absolute()) { - auto workDir = (thread != nullptr) ? - ((CCustomScript*)thread)->GetWorkDir() : - Filepath_Root.c_str(); - - fsSearchPath = workDir / fsSearchPath; + if (thread != nullptr) + fsSearchPath = ((CCustomScript*)thread)->GetWorkDir() / fsSearchPath; + else + fsSearchPath = Filepath_Game / fsSearchPath; } WIN32_FIND_DATA wfd = { 0 }; @@ -311,5 +311,15 @@ namespace CLEO return CreateStringList(found); } + + LPCSTR WINAPI CLEO_GetGameDirectory() + { + return Filepath_Game.c_str(); + } + + LPCSTR WINAPI CLEO_GetUserDirectory() + { + return Filepath_User.c_str(); + } } diff --git a/source/cleo.def b/source/cleo.def index 59112e0c..00da8e63 100644 --- a/source/cleo.def +++ b/source/cleo.def @@ -55,3 +55,5 @@ EXPORTS _CLEO_RegisterCommand@8 @52 _CLEO_IsScriptRunning@4 @53 _CLEO_TerminateScript@4 @54 + _CLEO_GetGameDirectory@0 @55 + _CLEO_GetUserDirectory@0 @56 diff --git a/source/stdafx.h b/source/stdafx.h index 96603147..266e9974 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -19,36 +19,57 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "..\cleo_sdk\CLEO.h" +#include "..\cleo_sdk\CLEO_Utils.h" // global constant paths. Initialize before anything else namespace FS = std::filesystem; -static std::string GetApplicationDirectory() +static std::string GetGameDirectory() // already stored in Filepath_Game { - char buffer[512]; - GetModuleFileNameA(NULL, buffer, sizeof(buffer) - 1); // game exe absolute path - return FS::path(buffer).parent_path().string(); + static const auto GTA_GetCWD = (char* (__cdecl*)(char*, int))0x00836E91; // SA 1.0 US ingame function + + std::string path; + + path.resize(MAX_PATH); + GTA_GetCWD(path.data(), path.size()); // assume work dir is game location when initialized + path.resize(strlen(path.data())); + + CLEO::FilepathNormalize(path); + + return std::move(path); } -static const std::string Filepath_Root = GetApplicationDirectory(); -//static const std::string Filepath_Cleo = FS::path(Filepath_Root).append("cleo").string(); // absolute path -static const std::string Filepath_Cleo = "cleo"; // relative path - allow mod loaders to affect it +static std::string GetUserDirectory() // already stored in Filepath_User +{ + static const char* GTA_User_Dir_Path = (char*)0x00C92368; // SA 1.0 US + static const auto GTA_InitUserDirectories = (char* (__cdecl*)())0x00744FB0; // SA 1.0 US -static const std::string Filepath_Config = FS::path(Filepath_Cleo).append(".cleo_config.ini").string(); -static const std::string Filepath_Log = FS::path(Filepath_Cleo).append(".cleo.log").string(); + if (strlen(GTA_User_Dir_Path) == 0) + { + GTA_InitUserDirectories(); + } + std::string path = GTA_User_Dir_Path; + CLEO::FilepathNormalize(path); -#include -#include -#include -#include -#include -#include -#include + return std::move(path); +} -#include "..\cleo_sdk\CLEO.h" -#include "..\cleo_sdk\CLEO_Utils.h" -#include "CTheScripts.h" +inline const std::string Filepath_Game = GetGameDirectory(); +inline const std::string Filepath_User = GetUserDirectory(); +inline const std::string Filepath_Cleo = Filepath_Game + "\\cleo"; +inline const std::string Filepath_Config = Filepath_Cleo + "\\.cleo_config.ini"; +inline const std::string Filepath_Log = Filepath_Cleo + "\\.cleo.log"; #define NUM_SCAN_ENTITIES 16 From 203af741a4efc25240da237a7b1172dd98a31922 Mon Sep 17 00:00:00 2001 From: Miran Date: Wed, 18 Sep 2024 23:42:51 +0200 Subject: [PATCH 193/216] Removed unused path functions. --- source/CCustomOpcodeSystem.cpp | 5 ----- source/CCustomOpcodeSystem.h | 3 --- source/CGameVersionManager.cpp | 2 -- source/CGameVersionManager.h | 2 -- 4 files changed, 12 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index d8617a35..1419f1e5 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -79,9 +79,6 @@ namespace CLEO void(__thiscall * ProcessScript)(CRunningScript*); - void(__cdecl * ChangeToUserDir)(); - void(__cdecl * ChangeToProgramDir)(const char *); - float(__cdecl * FindGroundZ)(float x, float y); CMarker * RadarBlips; @@ -214,8 +211,6 @@ namespace CLEO MemWrite(gvm.TranslateMemoryAddress(MA_OPCODE_HANDLER_REF), &customOpcodeHandlers); MemWrite(0x00469EF0, &customOpcodeHandlers); // TODO: game version translation - ChangeToUserDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_USER_DIR_FUNCTION); - ChangeToProgramDir = gvm.TranslateMemoryAddress(MA_CHANGE_TO_PROGRAM_DIR_FUNCTION); FindGroundZ = gvm.TranslateMemoryAddress(MA_FIND_GROUND_Z_FUNCTION); GetPlayerPed = gvm.TranslateMemoryAddress(MA_GET_PLAYER_PED_FUNCTION); Handling = gvm.TranslateMemoryAddress(MA_HANDLING); diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index 919c8e41..cbfc9b0a 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -8,9 +8,6 @@ namespace CLEO { typedef OpcodeResult(__stdcall * CustomOpcodeHandler)(CRunningScript*); - extern void(__cdecl* ChangeToUserDir)(); - extern void(__cdecl* ChangeToProgramDir)(const char*); - class CCustomOpcodeSystem : public VInjectible { public: diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 2227cfaa..a1c1175a 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -78,8 +78,6 @@ namespace CLEO { 0x00B74490, memory_und, 0x00B74490, 0x00B76B10, 0x00C01038 }, // MA_PED_POOL, { 0x00B74494, memory_und, 0x00B74494, 0x00B76B14, 0x00C0103C }, // MA_VEHICLE_POOL, { 0x00B7449C, memory_und, 0x00B7449C, 0x00B76B18, 0x00C01044 }, // MA_OBJECT_POOL, - { 0x00538860, memory_und, 0x00538860, 0x00538D00, 0x0054A730 }, // MA_CHANGE_TO_USER_DIR_FUNCTION, - { 0x005387D0, memory_und, 0x005387D0, 0x00538C70, 0x0054A680 }, // MA_CHANGE_TO_PROGRAM_DIR_FUNCTION, { 0x00569660, memory_und, 0x00569660, 0x00569B00, 0x00583CB0 }, // MA_FIND_GROUND_Z_FUNCTION, { 0x00BA86F0, memory_und, 0x00BA86F0, 0x00BAAD70, 0x00C36020 }, // MA_RADAR_BLIPS, { 0x00C2B9C8, memory_und, 0x00C2B9C8, 0x00C2E188, 0x00CAC1E0 }, // MA_HANDLING, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index 9d356bc5..e170ca41 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -94,8 +94,6 @@ namespace CLEO MA_PED_POOL, MA_VEHICLE_POOL, MA_OBJECT_POOL, - MA_CHANGE_TO_USER_DIR_FUNCTION, - MA_CHANGE_TO_PROGRAM_DIR_FUNCTION, MA_FIND_GROUND_Z_FUNCTION, MA_RADAR_BLIPS, MA_HANDLING, From b49b065a7433296a9fde74661ad2d1d1a863ee54 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 16 Sep 2024 08:38:32 +0200 Subject: [PATCH 194/216] Removed unused Handling --- source/CCustomOpcodeSystem.cpp | 3 --- source/CGameVersionManager.cpp | 1 - source/CGameVersionManager.h | 1 - 3 files changed, 5 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 1419f1e5..50218258 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -82,8 +82,6 @@ namespace CLEO float(__cdecl * FindGroundZ)(float x, float y); CMarker * RadarBlips; - CHandling * Handling; - CPlayerPed * (__cdecl * GetPlayerPed)(DWORD); void(__cdecl * SpawnCar)(DWORD); @@ -213,7 +211,6 @@ namespace CLEO FindGroundZ = gvm.TranslateMemoryAddress(MA_FIND_GROUND_Z_FUNCTION); GetPlayerPed = gvm.TranslateMemoryAddress(MA_GET_PLAYER_PED_FUNCTION); - Handling = gvm.TranslateMemoryAddress(MA_HANDLING); SpawnCar = gvm.TranslateMemoryAddress(MA_SPAWN_CAR_FUNCTION); // TODO: consider version-agnostic code diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index a1c1175a..ec46b0e2 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -80,7 +80,6 @@ namespace CLEO { 0x00B7449C, memory_und, 0x00B7449C, 0x00B76B18, 0x00C01044 }, // MA_OBJECT_POOL, { 0x00569660, memory_und, 0x00569660, 0x00569B00, 0x00583CB0 }, // MA_FIND_GROUND_Z_FUNCTION, { 0x00BA86F0, memory_und, 0x00BA86F0, 0x00BAAD70, 0x00C36020 }, // MA_RADAR_BLIPS, - { 0x00C2B9C8, memory_und, 0x00C2B9C8, 0x00C2E188, 0x00CAC1E0 }, // MA_HANDLING, { 0x0056E210, memory_und, 0x0056E210, 0x0056E6B0, 0x00563900 }, // MA_GET_PLAYER_PED_FUNCTION, { 0x0043A0B0, memory_und, 0x0043A0B0, 0x0043A136, 0x0043D3D0 }, // MA_SPAWN_CAR_FUNCTION, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index e170ca41..52383174 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -96,7 +96,6 @@ namespace CLEO MA_OBJECT_POOL, MA_FIND_GROUND_Z_FUNCTION, MA_RADAR_BLIPS, - MA_HANDLING, MA_GET_PLAYER_PED_FUNCTION, MA_SPAWN_CAR_FUNCTION, From 0647764c47783e81bbe563953a64112c883eb843 Mon Sep 17 00:00:00 2001 From: Miran Date: Mon, 16 Sep 2024 09:35:46 +0200 Subject: [PATCH 195/216] Use existing opcode system instead manually hooking 004E opcode. --- source/CCustomOpcodeSystem.cpp | 8 ++++++++ source/CCustomOpcodeSystem.h | 1 + source/CGameVersionManager.cpp | 1 - source/CGameVersionManager.h | 1 - source/CScriptEngine.cpp | 23 ----------------------- 5 files changed, 9 insertions(+), 25 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 50218258..c8b2b085 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -23,6 +23,7 @@ namespace CLEO template inline CRunningScript& operator>>(CRunningScript& thread, memory_pointer& pval); + OpcodeResult __stdcall opcode_004E(CRunningScript* thread); // terminate_this_script OpcodeResult __stdcall opcode_0051(CRunningScript * thread); // GOSUB return OpcodeResult __stdcall opcode_0417(CRunningScript* thread); // load_and_launch_mission_internal @@ -232,6 +233,7 @@ namespace CLEO TRACE(""); // separator TRACE("Initializing CLEO core opcodes..."); + CLEO_RegisterOpcode(0x004E, opcode_004E); CLEO_RegisterOpcode(0x0051, opcode_0051); CLEO_RegisterOpcode(0x0417, opcode_0417); CLEO_RegisterOpcode(0x0A92, opcode_0A92); @@ -797,6 +799,12 @@ namespace CLEO /* Opcode definitions */ /************************************************************************/ + OpcodeResult __stdcall CCustomOpcodeSystem::opcode_004E(CRunningScript* thread) + { + GetInstance().ScriptEngine.RemoveScript(thread); + return OR_INTERRUPT; + } + OpcodeResult __stdcall CCustomOpcodeSystem::opcode_0051(CRunningScript* thread) // GOSUB return { if (thread->SP == 0 && !IsLegacyScript(thread)) // CLEO5 - allow use of GOSUB `return` to exit cleo calls too diff --git a/source/CCustomOpcodeSystem.h b/source/CCustomOpcodeSystem.h index cbfc9b0a..6bd50410 100644 --- a/source/CCustomOpcodeSystem.h +++ b/source/CCustomOpcodeSystem.h @@ -44,6 +44,7 @@ namespace CLEO static OpcodeResult CleoReturnGeneric(WORD opcode, CRunningScript* thread, bool returnArgs = false, DWORD returnArgCount = 0, bool strictArgCount = true); + static OpcodeResult __stdcall opcode_004E(CRunningScript* thread); // terminate_this_script static OpcodeResult __stdcall opcode_0051(CRunningScript* thread); // GOSUB's return static OpcodeResult __stdcall opcode_0417(CRunningScript* thread); // load_and_launch_mission_internal diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index ec46b0e2..56d253a0 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -54,7 +54,6 @@ namespace CLEO { 0x005D4FD7, memory_und, 0x005D4FD7, 0x005D57B7, 0x005F1777 }, // MA_CALL_INIT_SCM3, { 0x005D14D5, memory_und, 0x005D14D5, 0x005D157C, 0x005EDBD4 }, // MA_CALL_SAVE_SCM_DATA, { 0x005D18F0, memory_und, 0x005D18F0, 0x005D20D0, 0x005EE017 }, // MA_CALL_LOAD_SCM_DATA, - { 0x004667DB, memory_und, 0x004667DB, 0x0046685B, 0x0046BEFD }, // MA_OPCODE_004E, { 0x0046A21B, memory_und, 0x0046A21B, 0x0046AE9B, 0x0046F9A8 }, // MA_CALL_PROCESS_SCRIPT { 0x00A94B68, memory_und, 0x00A94B68, 0x00A971E8, 0x00B09C80 }, // MA_SCRIPT_SPRITE_ARRAY { 0x00464980, memory_und, 0x00464980, 0x00465600, 0x0046A130 }, // MA_DRAW_SCRIPT_SPRITES diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index 52383174..f7f19738 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -70,7 +70,6 @@ namespace CLEO MA_CALL_INIT_SCM3, MA_CALL_SAVE_SCM_DATA, MA_CALL_LOAD_SCM_DATA, - MA_OPCODE_004E, MA_CALL_PROCESS_SCRIPT, MA_SCRIPT_SPRITE_ARRAY, MA_DRAW_SCRIPT_SPRITES, diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 666cfc1b..7497daba 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -310,28 +310,6 @@ namespace CLEO CRunningScript **inactiveThreadQueue, **activeThreadQueue; - - extern "C" void __stdcall opcode_004E(CCustomScript *pScript) // terminate_this_script - { - GetInstance().ScriptEngine.RemoveScript(pScript); - } - - extern "C" void __declspec(naked) opcode_004E_hook(void) - { - __asm - { - push esi - call opcode_004E - pop edi - mov al, 1 - pop esi - mov ecx, [esp + 0x14] - mov fs : 0, ecx - add esp, 32 - ret 0x4 - } - } - void OnLoadScmData(void) { TRACE("Loading scripts save data..."); @@ -928,7 +906,6 @@ namespace CLEO inj.ReplaceFunction(OnLoadScmData, gvm.TranslateMemoryAddress(MA_CALL_LOAD_SCM_DATA)); inj.ReplaceFunction(OnSaveScmData, gvm.TranslateMemoryAddress(MA_CALL_SAVE_SCM_DATA)); - inj.InjectFunction(&opcode_004E_hook, gvm.TranslateMemoryAddress(MA_OPCODE_004E)); } CScriptEngine::~CScriptEngine() From 515ad45b6566d6f36bc700f7e95da4ef30d0a067 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 19 Sep 2024 00:24:22 +0200 Subject: [PATCH 196/216] CLEO log improvements --- source/CCustomOpcodeSystem.cpp | 12 ++++++------ source/CPluginSystem.cpp | 2 +- source/CScriptEngine.cpp | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index c8b2b085..2eee2e41 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -836,7 +836,7 @@ namespace CLEO OPCODE_READ_PARAM_STRING(path); auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory - TRACE("[0A92] Starting new custom script %s from thread named %s", filename.c_str(), thread->GetName().c_str()); + TRACE("[0A92] Starting new custom script %s from thread named '%s'", filename.c_str(), thread->GetName().c_str()); auto cs = new CCustomScript(filename.c_str(), false, thread); SetScriptCondResult(thread, cs && cs->IsOK()); @@ -862,7 +862,7 @@ namespace CLEO CCustomScript *cs = reinterpret_cast(thread); if (thread->IsMission() || !cs->IsCustom()) { - LOG_WARNING(0, "Incorrect usage of opcode [0A93] in script %s. Use [004E] instead.", ((CCustomScript*)thread)->GetInfoStr().c_str()); + LOG_WARNING(0, "Incorrect usage of opcode [0A93] in script '%s'. Use [004E] instead.", ((CCustomScript*)thread)->GetInfoStr().c_str()); return OR_CONTINUE; // legacy behavior } @@ -877,7 +877,7 @@ namespace CLEO auto filename = reinterpret_cast(thread)->ResolvePath(path, DIR_CLEO); // legacy: default search location is game\cleo directory filename += ".cm"; // add custom mission extension - TRACE("[0A94] Starting new custom mission %s from thread named %s", filename.c_str(), thread->GetName().c_str()); + TRACE("[0A94] Starting new custom mission '%s' from thread named '%s'", filename.c_str(), thread->GetName().c_str()); auto cs = new CCustomScript(filename.c_str(), true, thread); SetScriptCondResult(thread, cs && cs->IsOK()); @@ -1834,11 +1834,11 @@ extern "C" if (label != 0) // create from label { - TRACE("Starting new custom script from thread named %s label %i", filename.c_str(), label); + TRACE("Starting new custom script from thread named '%s' label %i", filename.c_str(), label); } else { - TRACE("Starting new custom script %s", filename.c_str()); + TRACE("Starting new custom script '%s'", filename.c_str()); } // if "label == 0" then "script_name" need to be the file name @@ -1857,7 +1857,7 @@ extern "C" { if (cs) delete cs; if (fromThread) SkipUnusedVarArgs(fromThread); - LOG_WARNING(0, "Failed to load script '%s'.", filename.c_str()); + LOG_WARNING(0, "Failed to load script '%s'", filename.c_str()); return nullptr; } diff --git a/source/CPluginSystem.cpp b/source/CPluginSystem.cpp index c18f6da6..5a59e1e9 100644 --- a/source/CPluginSystem.cpp +++ b/source/CPluginSystem.cpp @@ -50,7 +50,7 @@ void CPluginSystem::LoadPlugins() else { skippedPaths.emplace(files.strings[i]); - LOG_WARNING(0, " - '%s' skipped, duplicate of `%s` plugin", files.strings[i], name.c_str()); + LOG_WARNING(0, " - '%s' skipped, duplicate of '%s' plugin", files.strings[i], name.c_str()); } } diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index 7497daba..ee62f041 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -1020,7 +1020,7 @@ namespace CLEO if (!cs || !cs->bOK) { - TRACE("Loading of custom script %s failed", szFilePath); + TRACE("Loading of custom script '%s' failed", szFilePath); if (cs) delete cs; return nullptr; } @@ -1034,7 +1034,7 @@ namespace CLEO { if (stopped_info[i] == cs->dwChecksum) { - TRACE("Custom script %s found in the stop-list", szFilePath); + TRACE("Custom script '%s' found in the stop-list", szFilePath); InactiveScriptHashes.insert(stopped_info[i]); delete cs; return nullptr; @@ -1049,7 +1049,7 @@ namespace CLEO { if (safe_info[i].hash == cs->dwChecksum) { - TRACE("Custom script %s found in the safe-list", szFilePath); + TRACE("Custom script '%s' found in the safe-list", szFilePath); safe_info[i].Apply(cs); break; } @@ -1075,7 +1075,7 @@ namespace CLEO // load cleo saving file try { - TRACE("Loading cleo safe %s", saveFile.c_str()); + TRACE("Loading cleo safe '%s'", saveFile.c_str()); std::ifstream ss(saveFile.c_str(), std::ios::binary); if (ss.is_open()) { @@ -1102,7 +1102,7 @@ namespace CLEO } catch (std::exception& ex) { - TRACE("Loading of cleo safe %s failed: %s", saveFile.c_str(), ex.what()); + TRACE("Loading of cleo safe '%s' failed: %s", saveFile.c_str(), ex.what()); safe_header.n_saved_threads = safe_header.n_stopped_threads = 0; memset(CleoVariables, 0, sizeof(CleoVariables)); } @@ -1126,7 +1126,7 @@ namespace CLEO char safe_name[MAX_PATH]; sprintf(safe_name, "./cleo/cleo_saves/cs%d.sav", nSlot); - TRACE("Saving script engine state to the file %s", safe_name); + TRACE("Saving script engine state to the file '%s'", safe_name); CreateDirectory("cleo", NULL); CreateDirectory("cleo/cleo_saves", NULL); @@ -1306,12 +1306,12 @@ namespace CLEO { if (cs->IsMission()) { - TRACE("Registering custom mission named %s", cs->GetName().c_str()); + TRACE("Registering custom mission named '%s'", cs->GetName().c_str()); CustomMission = cs; } else { - TRACE("Registering custom script named %s", cs->GetName().c_str()); + TRACE("Registering custom script named '%s'", cs->GetName().c_str()); CustomScripts.push_back(cs); } AddScriptToQueue(cs, activeThreadQueue); @@ -1360,7 +1360,7 @@ namespace CLEO } if (cs == CustomMission) { - TRACE("Unregistering custom mission named %s", cs->GetName().c_str()); + TRACE("Unregistering custom mission named '%s'", cs->GetName().c_str()); RemoveScriptFromQueue(CustomMission, activeThreadQueue); ScriptsWaitingForDelete.push_back(cs); CustomMission->SetActive(false); @@ -1372,11 +1372,11 @@ namespace CLEO if (cs->bSaveEnabled) { InactiveScriptHashes.insert(cs->dwChecksum); - TRACE("Stopping custom script named %s", cs->GetName().c_str()); + TRACE("Stopping custom script named '%s'", cs->GetName().c_str()); } else { - TRACE("Unregistering custom script named %s", cs->GetName().c_str()); + TRACE("Unregistering custom script named '%s'", cs->GetName().c_str()); ScriptsWaitingForDelete.push_back(cs); } @@ -1404,14 +1404,14 @@ namespace CLEO std::for_each(ScriptsWaitingForDelete.begin(), ScriptsWaitingForDelete.end(), [this](CCustomScript *cs) { - TRACE("Deleting inactive script named %s", cs->GetName().c_str()); + TRACE("Deleting inactive script named '%s'", cs->GetName().c_str()); delete cs; }); ScriptsWaitingForDelete.clear(); if (CustomMission) { - TRACE("Unregistering custom mission named %s", CustomMission->GetName().c_str()); + TRACE("Unregistering custom mission named '%s'", CustomMission->GetName().c_str()); RemoveScriptFromQueue(CustomMission, activeThreadQueue); CustomMission->SetActive(false); delete CustomMission; @@ -1605,7 +1605,7 @@ namespace CLEO } catch (...) { - LOG_WARNING(0, "Unknown error during loading of custom script %s occured.", szFileName); + LOG_WARNING(0, "Unknown error during loading of custom script '%s' occured.", szFileName); } } From c81cd9792ab761f42f992adecc572f6ec00a61f5 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 19 Sep 2024 05:53:30 +0200 Subject: [PATCH 197/216] Added debug prints colors and font style to config file. Increased debug font size and proportions. --- cleo_plugins/DebugUtils/SA.DebugUtils.ini | 24 ++++++++++++++---- cleo_plugins/DebugUtils/ScreenLog.cpp | 30 +++++++++++++++++++---- cleo_plugins/DebugUtils/ScreenLog.h | 8 +++--- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/cleo_plugins/DebugUtils/SA.DebugUtils.ini b/cleo_plugins/DebugUtils/SA.DebugUtils.ini index 829c7ef3..189b0d9e 100644 --- a/cleo_plugins/DebugUtils/SA.DebugUtils.ini +++ b/cleo_plugins/DebugUtils/SA.DebugUtils.ini @@ -1,10 +1,24 @@ [General] ; Opcodes 0662, 0663, 0664: 0 - off, 1 - enabled -LegacyDebugOpcodes=0 +LegacyDebugOpcodes = 0 [ScreenLog] ; Level: 0 - off, 1 - errors and warnings, 2 - debug messages, 3 - all -Level=2 -MessageTime=3000 -MessagesMax=45 -FontSize=45 +Level = 2 + +; Minimum display time of single message +MessageTime = 3000 + +; Maximum count of visible messages +MessagesMax = 32 + +; Font size +FontSize = 56 + +; Font style: 0 - gothic, 1 - subtitles, 2 - menu, 3 - pricedown +FontStyle = 1 + +; Messages colors: hexadecimal RGBA +ColorError = "FF30EEFF" +ColorDebug = "FFEE30FF" +ColorSystem = "DDDDDDFF" diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 6c9151e8..159fab9b 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -1,6 +1,5 @@ #include "ScreenLog.h" #include "CLEO_Utils.h" -#include "CFont.h" #include "CTimer.h" DWORD ScreenLog::timeDisplay = 1000; @@ -13,14 +12,35 @@ ScreenLog::ScreenLog() void ScreenLog::Init() { + auto ConfigReadHex = [](const char* section, const char* key, DWORD defValue, const char* filename) + { + char buff[32] = { 0 }; + if (!GetPrivateProfileString(section, key, "", buff, sizeof(buff), filename)) + return defValue; + + char* end; + DWORD result = strtoul(buff, &end, 16); + + if (*end != '\0') + return defValue; // any invalid char + + return result; + }; + // load settings from ini file auto config = GetConfigFilename(); - level = (eLogLevel)GetPrivateProfileInt("ScreenLog", "Level", (UINT)eLogLevel::None, config.c_str()); + level = (eLogLevel)GetPrivateProfileInt("ScreenLog", "Level", (DWORD)eLogLevel::None, config.c_str()); maxMessages = GetPrivateProfileInt("ScreenLog", "MessagesMax", 40, config.c_str()); timeDisplay = GetPrivateProfileInt("ScreenLog", "MessageTime", 6000, config.c_str()); timeFadeout = 3000; + fontSize = 0.01f * GetPrivateProfileInt("ScreenLog", "FontSize", 60, config.c_str()); + fontStyle = (eFontStyle)GetPrivateProfileInt("ScreenLog", "FontStyle", eFontStyle::FONT_SUBTITLES, config.c_str()); + + fontColor[(size_t)eLogLevel::Error] = CRGBA(ConfigReadHex("ScreenLog", "ColorError", fontColor[(size_t)eLogLevel::Error].ToInt(), config.c_str())); + fontColor[(size_t)eLogLevel::Debug] = CRGBA(ConfigReadHex("ScreenLog", "ColorDebug", fontColor[(size_t)eLogLevel::Debug].ToInt(), config.c_str())); + fontColor[(size_t)eLogLevel::Default] = CRGBA(ConfigReadHex("ScreenLog", "ColorSystem", fontColor[(size_t)eLogLevel::Default].ToInt(), config.c_str())); } void ScreenLog::Add(eLogLevel level, const char* msg) @@ -84,12 +104,12 @@ void ScreenLog::Draw() CFont::SetBackground(false, false); CFont::SetWrapx(99999999.0f); // no line wrap - CFont::SetFontStyle(FONT_SUBTITLES); + CFont::SetFontStyle(fontStyle); CFont::SetEdge(1); CFont::SetProportional(true); const float aspect = (float)RsGlobal.maximumWidth / RsGlobal.maximumHeight; - float sizeX = fontSize * 0.55f * RsGlobal.maximumWidth / 640.0f / aspect; + float sizeX = fontSize * 0.58f * RsGlobal.maximumWidth / 640.0f / aspect; float sizeY = fontSize * RsGlobal.maximumHeight / 448.0f; CFont::SetScale(sizeX, sizeY); @@ -159,7 +179,7 @@ void ScreenLog::DrawLine(const char* msg, size_t row) { CFont::SetBackground(false, false); CFont::SetWrapx(99999999.0f); // no line wrap - CFont::SetFontStyle(FONT_SUBTITLES); + CFont::SetFontStyle(fontStyle); CFont::SetEdge(1); CFont::SetProportional(true); diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index a487b783..d4c193bc 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -1,5 +1,6 @@ #pragma once #include "CLEO.h" +#include "CFont.h" #include "CRGBA.h" #include #include @@ -22,11 +23,12 @@ class ScreenLog eLogLevel level; size_t maxMessages; float fontSize; + eFontStyle fontStyle; DWORD timeFadeout; // miliseconds - const CRGBA fontColor[4] = { // colors for eLogLevel + CRGBA fontColor[4] = { // colors for eLogLevel CRGBA(0xDD, 0xDD, 0xDD, 0xFF), // None - CRGBA(0xFF, 0x30, 0x30, 0xFF), // Error + CRGBA(0xFF, 0x30, 0xFF, 0xFF), // Error CRGBA(0xFF, 0xEE, 0x30, 0xFF), // User CRGBA(0xDD, 0xDD, 0xDD, 0xFF), // Default }; @@ -106,7 +108,7 @@ class ScreenLog void ResetTime() { - timeLeft = min(msg.length(), 200) * 0.06f; // 16 letters peer second reading speed + timeLeft = min(msg.length(), 200) * 0.055f; // 18 letters peer second reading speed timeLeft = max(timeLeft, 0.001f * ScreenLog::timeDisplay); // not shorter than defined in config } From 244207fb85bbedc44f0ea00a7cd49781cfeffcd9 Mon Sep 17 00:00:00 2001 From: Miran Date: Thu, 19 Sep 2024 06:03:21 +0200 Subject: [PATCH 198/216] fixup! Added debug prints colors and font style to config file. --- cleo_plugins/DebugUtils/SA.DebugUtils.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cleo_plugins/DebugUtils/SA.DebugUtils.ini b/cleo_plugins/DebugUtils/SA.DebugUtils.ini index 189b0d9e..03a654fe 100644 --- a/cleo_plugins/DebugUtils/SA.DebugUtils.ini +++ b/cleo_plugins/DebugUtils/SA.DebugUtils.ini @@ -9,7 +9,7 @@ Level = 2 ; Minimum display time of single message MessageTime = 3000 -; Maximum count of visible messages +; Maximum count of messages visible at the same time (~row count) MessagesMax = 32 ; Font size From ba6d9427c81ab845b7e96cf26ac8d1c1f2f96f8c Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 21 Sep 2024 06:18:04 +0200 Subject: [PATCH 199/216] Updated OnDrawingFinished hook to not be overwritten by other mods. (#201) * Updated OnDrawingFinished hook to not be overwritten by other mods. * Added extra error checking. * Replaced m_bStarted and m_bLateStarted with single variable. * fixup! Replaced m_bStarted and m_bLateStarted with single variable. * Switched hooked function to work with SkyGFX * fixup! Switched hooked function to work with SkyGFX * fixup! Switched hooked function to work with SkyGFX * Simplified DebugDisplayTextBuffer hooking. * fixup! Simplified DebugDisplayTextBuffer hooking. * fixup! Simplified DebugDisplayTextBuffer hooking. * fixup! Simplified DebugDisplayTextBuffer hooking. * fixup! Simplified DebugDisplayTextBuffer hooking. * Handled JMPSHORT hooking. * fixup! Handled JMPSHORT hooking. --- source/CCodeInjector.h | 22 +++++--- source/CGameMenu.cpp | 2 + source/CGameVersionManager.cpp | 1 + source/CGameVersionManager.h | 1 + source/CleoBase.cpp | 97 ++++++++++++++++++++-------------- source/CleoBase.h | 37 ++++++++----- source/Mem.h | 30 +++++++++-- source/dllmain.cpp | 2 +- 8 files changed, 130 insertions(+), 62 deletions(-) diff --git a/source/CCodeInjector.h b/source/CCodeInjector.h index 8f0eccb2..0cafb842 100644 --- a/source/CCodeInjector.h +++ b/source/CCodeInjector.h @@ -51,17 +51,27 @@ namespace CLEO void CloseReadWriteAccess(); template - void ReplaceFunction(T *funcPtr, memory_pointer Position, T** origFuncPtr = nullptr) + void ReplaceFunction(T *funcPtr, memory_pointer position, T** origFuncPtr = nullptr) { - TRACE("Replacing call: 0x%08X", (DWORD)Position); - MemCall((size_t)Position, (size_t)funcPtr, (size_t*)origFuncPtr); // *whistle* + MemCall((size_t)position, (size_t)funcPtr, (size_t*)origFuncPtr); + + if (origFuncPtr == nullptr) { TRACE("Replaced call at: 0x%08X", (DWORD)position); } + else { TRACE("Replaced call at: 0x%08X, original function was: 0x%08X", (DWORD)position, (DWORD)*origFuncPtr); } + } + + void ReplaceJump(memory_pointer newJumpDst, memory_pointer position, memory_pointer* origJumpDest = nullptr) + { + MemJump((size_t)position, (size_t)newJumpDst, (size_t*)origJumpDest); + + if (origJumpDest == nullptr) { TRACE("Replaced jump at: 0x%08X", (DWORD)position); } + else { TRACE("Replaced jump at: 0x%08X, original destination was: 0x%08X", (DWORD)position, (DWORD)origJumpDest->address); } } template - void InjectFunction(T *funcPtr, memory_pointer Position) + void InjectFunction(T *funcPtr, memory_pointer position) { - TRACE("Injecting function at: 0x%08X", (DWORD)Position); - MemJump((size_t)Position, (size_t)funcPtr); + TRACE("Injecting function at: 0x%08X", (DWORD)position); + MemJump((size_t)position, (size_t)funcPtr); } void Nop(memory_pointer addr, size_t size) diff --git a/source/CGameMenu.cpp b/source/CGameMenu.cpp index 585dd49e..dbc2666b 100644 --- a/source/CGameMenu.cpp +++ b/source/CGameMenu.cpp @@ -24,6 +24,8 @@ namespace CLEO void __fastcall OnDrawMenuBackground(void *texture, int dummy, RwRect2D *rect, RwRGBA *color) { + GetInstance().Start(CCleoInstance::InitStage::OnDraw); // late initialization + CTexture_DrawInRect(texture, rect, color); // call original CFont::SetBackground(false, false); diff --git a/source/CGameVersionManager.cpp b/source/CGameVersionManager.cpp index 56d253a0..cc34cfc1 100644 --- a/source/CGameVersionManager.cpp +++ b/source/CGameVersionManager.cpp @@ -70,6 +70,7 @@ namespace CLEO { 0x0053C758, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_1 TODO: find for other versions { 0x00748E04, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_2 TODO: find for other versions { 0x00748E3E, memory_und, memory_und, memory_und, memory_und }, // MA_CALL_GAME_RESTART_3 TODO: find for other versions + { 0x00532260, memory_und, memory_und, memory_und, memory_und }, // MA_DEBUG_DISPLAY_TEXT_BUFFER TODO: find for other versions // GV_US10, GV_US11, GV_EU10, GV_EU11, GV_STEAM { 0x008A6168, memory_und, 0x008A6168, 0x008A7450, 0x00913C20 }, // MA_OPCODE_HANDLER, diff --git a/source/CGameVersionManager.h b/source/CGameVersionManager.h index f7f19738..bce0d789 100644 --- a/source/CGameVersionManager.h +++ b/source/CGameVersionManager.h @@ -86,6 +86,7 @@ namespace CLEO MA_CALL_GAME_RESTART_1, MA_CALL_GAME_RESTART_2, MA_CALL_GAME_RESTART_3, + MA_DEBUG_DISPLAY_TEXT_BUFFER, // empty function called after everything else is drawn // CustomOpcodeSystem MA_OPCODE_HANDLER, diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 418b601c..31637e39 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -7,11 +7,6 @@ namespace CLEO CCleoInstance CleoInstance; CCleoInstance& GetInstance() { return CleoInstance; } - inline CCleoInstance::CCleoInstance() - { - m_bStarted = false; - } - inline CCleoInstance::~CCleoInstance() { Stop(); @@ -111,52 +106,79 @@ namespace CLEO _asm jmp oriFunc } - void CCleoInstance::Start() + void __declspec(naked) CCleoInstance::OnDebugDisplayTextBuffer() + { + GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks + static DWORD oriFunc; + oriFunc = (DWORD)(GetInstance().DebugDisplayTextBuffer); + if (oriFunc != (DWORD)nullptr) + _asm jmp oriFunc + else + _asm ret + } + + void CCleoInstance::Start(InitStage stage) { - if (m_bStarted) return; // already started - m_bStarted = true; + if (stage > InitStage::Done) return; // invalid argument + + auto nextStage = InitStage(m_initStage + 1); + if (stage != nextStage) return; - FS::create_directory(Filepath_Cleo); - FS::create_directory(Filepath_Cleo + "\\cleo_modules"); - FS::create_directory(Filepath_Cleo + "\\cleo_plugins"); - FS::create_directory(Filepath_Cleo + "\\cleo_saves"); + if (stage == InitStage::Initial) + { + TRACE("CLEO initialization: Phase 1"); - OpcodeInfoDb.Load((Filepath_Cleo + "\\.config\\sa.json").c_str()); + FS::create_directory(Filepath_Cleo); + FS::create_directory(Filepath_Cleo + "\\cleo_modules"); + FS::create_directory(Filepath_Cleo + "\\cleo_plugins"); + FS::create_directory(Filepath_Cleo + "\\cleo_saves"); - CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init - GameMenu.Inject(CodeInjector); - DmaFix.Inject(CodeInjector); - OpcodeSystem.Inject(CodeInjector); - ScriptEngine.Inject(CodeInjector); + OpcodeInfoDb.Load((Filepath_Cleo + "\\.config\\sa.json").c_str()); - CodeInjector.ReplaceFunction(OnCreateMainWnd, VersionManager.TranslateMemoryAddress(MA_CALL_CREATE_MAIN_WINDOW), &CreateMainWnd_Orig); + CodeInjector.OpenReadWriteAccess(); // must do this earlier to ensure plugins write access on init + GameMenu.Inject(CodeInjector); + DmaFix.Inject(CodeInjector); + OpcodeSystem.Inject(CodeInjector); + ScriptEngine.Inject(CodeInjector); - CodeInjector.ReplaceFunction(OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS), &UpdateGameLogics); + CodeInjector.ReplaceFunction(OnCreateMainWnd, VersionManager.TranslateMemoryAddress(MA_CALL_CREATE_MAIN_WINDOW), &CreateMainWnd_Orig); - CodeInjector.ReplaceFunction(OnScmInit1, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM1), &ScmInit1_Orig); - CodeInjector.ReplaceFunction(OnScmInit2, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM2), &ScmInit2_Orig); - CodeInjector.ReplaceFunction(OnScmInit3, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM3), &ScmInit3_Orig); + CodeInjector.ReplaceFunction(OnUpdateGameLogics, VersionManager.TranslateMemoryAddress(MA_CALL_UPDATE_GAME_LOGICS), &UpdateGameLogics); - CodeInjector.ReplaceFunction(OnGameShutdown, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_SHUTDOWN), &GameShutdown); + CodeInjector.ReplaceFunction(OnScmInit1, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM1), &ScmInit1_Orig); + CodeInjector.ReplaceFunction(OnScmInit2, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM2), &ScmInit2_Orig); + CodeInjector.ReplaceFunction(OnScmInit3, VersionManager.TranslateMemoryAddress(MA_CALL_INIT_SCM3), &ScmInit3_Orig); - CodeInjector.ReplaceFunction(OnGameRestart1, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_1), &GameRestart1); - CodeInjector.ReplaceFunction(OnGameRestart2, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_2), &GameRestart2); - CodeInjector.ReplaceFunction(OnGameRestart3, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_3), &GameRestart3); + CodeInjector.ReplaceFunction(OnGameShutdown, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_SHUTDOWN), &GameShutdown); - CodeInjector.ReplaceFunction(OnDrawingFinished, 0x00734640); // nullsub_63 - originally something like renderDebugStuff? + CodeInjector.ReplaceFunction(OnGameRestart1, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_1), &GameRestart1); + CodeInjector.ReplaceFunction(OnGameRestart2, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_2), &GameRestart2); + CodeInjector.ReplaceFunction(OnGameRestart3, VersionManager.TranslateMemoryAddress(MA_CALL_GAME_RESTART_3), &GameRestart3); - OpcodeSystem.Init(); - PluginSystem.LoadPlugins(); + OpcodeSystem.Init(); + PluginSystem.LoadPlugins(); + } + + // delayed until menu background drawing + if (stage == InitStage::OnDraw) + { + TRACE("CLEO initialization: Phase 2"); + + CodeInjector.ReplaceJump(OnDebugDisplayTextBuffer, VersionManager.TranslateMemoryAddress(MA_DEBUG_DISPLAY_TEXT_BUFFER), &DebugDisplayTextBuffer); + } + + m_initStage = stage; } void CCleoInstance::Stop() { - if (!m_bStarted) return; - m_bStarted = false; - - ScriptEngine.GameEnd(); + if (m_initStage >= InitStage::Initial) + { + ScriptEngine.GameEnd(); + PluginSystem.UnloadPlugins(); + } - PluginSystem.UnloadPlugins(); + m_initStage = InitStage::None; } void CCleoInstance::GameBegin() @@ -230,11 +252,6 @@ namespace CLEO GetInstance().RemoveCallback(id, func); } - void __cdecl CCleoInstance::OnDrawingFinished() - { - GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks - } - DWORD WINAPI CLEO_GetInternalAudioStream(CLEO::CRunningScript* thread, DWORD stream) // arg CAudioStream * { return stream; // CAudioStream::streamInternal offset is 0 diff --git a/source/CleoBase.h b/source/CleoBase.h index 7c129e3e..4423c548 100644 --- a/source/CleoBase.h +++ b/source/CleoBase.h @@ -17,12 +17,16 @@ namespace CLEO { class CCleoInstance { - bool m_bStarted; - bool m_bGameInProgress; - std::map> m_callbacks; - public: - // order here defines init and deinit and order! + enum InitStage : size_t + { + None, + Initial, + OnDraw, + Done = OnDraw + }; + + // order here defines init and deinit order! CDmaFix DmaFix; CGameMenu GameMenu; CCodeInjector CodeInjector; @@ -35,16 +39,16 @@ namespace CLEO int saveSlot = -1; // -1 if not loaded from save - CCleoInstance(); + CCleoInstance() = default; virtual ~CCleoInstance(); - void Start(); + void Start(InitStage stage); void Stop(); void GameBegin(); void GameEnd(); - bool IsStarted() const { return m_bStarted; } + bool IsStarted() const { return m_initStage != InitStage::None; } void AddCallback(eCallbackId id, void* func); void RemoveCallback(eCallbackId id, void* func); @@ -75,15 +79,24 @@ namespace CLEO // call for Game::Shutdown void(__cdecl* GameShutdown)() = nullptr; - static void __cdecl OnGameShutdown(); + static void OnGameShutdown(); // calls for Game::ShutDownForRestart void(__cdecl* GameRestart1)() = nullptr; void(__cdecl* GameRestart2)() = nullptr; void(__cdecl* GameRestart3)() = nullptr; - static void __cdecl OnGameRestart1(); - static void __cdecl OnGameRestart2(); - static void __cdecl OnGameRestart3(); + static void OnGameRestart1(); + static void OnGameRestart2(); + static void OnGameRestart3(); + + // empty function called after everything else is drawn + memory_pointer DebugDisplayTextBuffer = nullptr; + static void OnDebugDisplayTextBuffer(); + + private: + InitStage m_initStage = InitStage::None; + bool m_bGameInProgress; + std::map> m_callbacks; }; CCleoInstance& GetInstance(); diff --git a/source/Mem.h b/source/Mem.h index 2f684d02..56696fbf 100644 --- a/source/Mem.h +++ b/source/Mem.h @@ -17,12 +17,29 @@ inline void MemCopy(U p, const T* v) { memcpy((void*)p, v, sizeof(T)); } template inline void MemCopy(U p, const T* v, int n) { memcpy((void*)p, v, n); } -// Write a jump to v to the address at p and copy the replaced call address to r +// Write a jump to v to the address at p and copy the replaced jump address to r template inline void MemJump(U p, const T v, T *r = nullptr) { + if (r != nullptr) + { + switch (MemRead(p)) + { + case OP_JMP: + *r = (T)(DWORD(p) + 5 + MemRead(p + 1)); + break; + + case OP_JMPSHORT: + *r = (T)(DWORD(p) + 2 + MemRead(p + 1)); + break; + + default: + *r = (T)nullptr; + break; + } + } + MemWrite(p++, OP_JMP); - if (r) *r = (T)(MemRead(p) + p + 4); MemWrite(p, ((DWORD)v - (DWORD)p) - 4); } @@ -30,8 +47,15 @@ inline void MemJump(U p, const T v, T *r = nullptr) template inline void MemCall(U p, const T v, T *r = nullptr) { + if (r != nullptr) + { + if (MemRead(p) == OP_CALL) + *r = (T)(DWORD(p) + 5 + MemRead(p + 1)); + else + *r = (T)nullptr; + } + MemWrite(p++, OP_CALL); - if (r) *r = (T)(MemRead(p) + p + 4); MemWrite(p, (DWORD)v - (DWORD)p - 4); } diff --git a/source/dllmain.cpp b/source/dllmain.cpp index abdbd40b..784ba0da 100644 --- a/source/dllmain.cpp +++ b/source/dllmain.cpp @@ -31,7 +31,7 @@ class Starter " 10) gta_sa.exe, decrypted 3.0 steam executable, 5 697 536 bytes." ); - CLEO::GetInstance().Start(); + CLEO::GetInstance().Start(CLEO::CCleoInstance::InitStage::Initial); } ~Starter() From 4f49346302f58390b3935d9a537c652d0001a072 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 21 Sep 2024 18:23:26 +0200 Subject: [PATCH 200/216] Fixed Alpha.71 crashing (#207) --- source/CleoBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CleoBase.cpp b/source/CleoBase.cpp index 31637e39..a532b2af 100644 --- a/source/CleoBase.cpp +++ b/source/CleoBase.cpp @@ -108,7 +108,7 @@ namespace CLEO void __declspec(naked) CCleoInstance::OnDebugDisplayTextBuffer() { - GetInstance().CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks + CleoInstance.CallCallbacks(eCallbackId::DrawingFinished); // execute registered callbacks static DWORD oriFunc; oriFunc = (DWORD)(GetInstance().DebugDisplayTextBuffer); if (oriFunc != (DWORD)nullptr) From b91818c7b0ee4c55f76246c643ee615f98b4968d Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 23 Sep 2024 17:10:17 +0200 Subject: [PATCH 201/216] Fix CLEO not loading with UAL (#212) --- cleo_sdk/CLEO_Utils.h | 13 +++++++++++++ source/stdafx.h | 9 ++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index ec373658..632ae0ba 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -162,6 +162,19 @@ namespace CLEO path.replace(0, base.length() + 1, ""); // remove path separator too if present } + // get path without last file/directory element + static const std::string_view FilepathGetParent(const std::string_view str) + { + auto separatorPos = str.find_last_of('\\'); + + if (separatorPos == std::string::npos) + { + return {}; + } + + return std::string_view(str.data(), separatorPos); + } + // this plugin's config file static std::string GetConfigFilename() { diff --git a/source/stdafx.h b/source/stdafx.h index 266e9974..ac92b8be 100644 --- a/source/stdafx.h +++ b/source/stdafx.h @@ -36,16 +36,11 @@ namespace FS = std::filesystem; static std::string GetGameDirectory() // already stored in Filepath_Game { - static const auto GTA_GetCWD = (char* (__cdecl*)(char*, int))0x00836E91; // SA 1.0 US ingame function - std::string path; - path.resize(MAX_PATH); - GTA_GetCWD(path.data(), path.size()); // assume work dir is game location when initialized - path.resize(strlen(path.data())); - + GetModuleFileNameA(NULL, path.data(), path.size()); // game exe absolute path + path.resize(CLEO::FilepathGetParent(path).length()); CLEO::FilepathNormalize(path); - return std::move(path); } From 7a262f1376f9084b69b57d50da2235ad536a69ff Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 23 Sep 2024 21:32:31 +0200 Subject: [PATCH 202/216] Removed duplicated stringPrintF util function. (#209) --- source/CCustomOpcodeSystem.cpp | 8 ++++---- source/CDebug.cpp | 17 ----------------- source/CDebug.h | 2 -- source/CScriptEngine.cpp | 4 ++-- 4 files changed, 6 insertions(+), 25 deletions(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index 2eee2e41..df91b1f0 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -387,13 +387,13 @@ namespace CLEO if (str != nullptr && (size_t)str <= CCustomOpcodeSystem::MinValidAddress) { - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string from invalid '0x%X' pointer", target.data); + CCustomOpcodeSystem::lastErrorMsg = StringPrintf("Writing string from invalid '0x%X' pointer", target.data); return false; } if ((size_t)target.data <= CCustomOpcodeSystem::MinValidAddress) { - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", target.data); + CCustomOpcodeSystem::lastErrorMsg = StringPrintf("Writing string into invalid '0x%X' pointer argument", target.data); return false; } @@ -428,7 +428,7 @@ namespace CLEO if (opcodeParams[0].dwParam <= CCustomOpcodeSystem::MinValidAddress) { - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); + CCustomOpcodeSystem::lastErrorMsg = StringPrintf("Writing string into invalid '0x%X' pointer argument", opcodeParams[0].dwParam); return result; // error } @@ -465,7 +465,7 @@ namespace CLEO } } - CCustomOpcodeSystem::lastErrorMsg = stringPrintf("Writing string, got argument %s", ToKindStr(paramType)); + CCustomOpcodeSystem::lastErrorMsg = StringPrintf("Writing string, got argument %s", ToKindStr(paramType)); CLEO_SkipOpcodeParams(thread, 1); // skip unhandled param return result; // error } diff --git a/source/CDebug.cpp b/source/CDebug.cpp index 91c3a626..7d253e51 100644 --- a/source/CDebug.cpp +++ b/source/CDebug.cpp @@ -6,23 +6,6 @@ CDebug Debug; using namespace CLEO; -std::string stringPrintf(const char* format, ...) -{ - va_list args; - - va_start(args, format); - auto len = std::vsnprintf(nullptr, 0, format, args) + 1; - va_end(args); - - std::string result(len, '\0'); - - va_start(args, format); - std::vsnprintf(result.data(), result.length(), format, args); - va_end(args); - - return result; -} - void CDebug::Trace(CLEO::eLogLevel level, const char* msg) { std::lock_guard guard(mutex); diff --git a/source/CDebug.h b/source/CDebug.h index c6223d54..74213c5d 100644 --- a/source/CDebug.h +++ b/source/CDebug.h @@ -1,8 +1,6 @@ #pragma once #include -std::string stringPrintf(const char* format, ...); - namespace CLEO { class CRunningScript; diff --git a/source/CScriptEngine.cpp b/source/CScriptEngine.cpp index ee62f041..7e3d4258 100644 --- a/source/CScriptEngine.cpp +++ b/source/CScriptEngine.cpp @@ -932,7 +932,7 @@ namespace CLEO } else // mission pack { - MainScriptFileDir = Filepath_User + stringPrintf("\\MPACK\\MPACK%d", CGame::bMissionPackGame); + MainScriptFileDir = Filepath_User + StringPrintf("\\MPACK\\MPACK%d", CGame::bMissionPackGame); MainScriptFileName = "scr.scm"; } @@ -1066,7 +1066,7 @@ namespace CLEO if(saveSlot == -1) return; - auto saveFile = FS::path(Filepath_Cleo).append(stringPrintf("cleo_saves\\cs%d.sav", saveSlot)).string(); + auto saveFile = FS::path(Filepath_Cleo).append(StringPrintf("cleo_saves\\cs%d.sav", saveSlot)).string(); safe_info = nullptr; stopped_info = nullptr; From 468e2d7e02abee30218e40f643977125d9e17f6b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Tue, 24 Sep 2024 05:24:12 +0200 Subject: [PATCH 203/216] Fix BASS_StreamFree called twice for 3d audio streams. (#210) * Fix BASS_StreamFree called twice for 3d audio streams. * Added BASS library version check. --- cleo_plugins/Audio/C3DAudioStream.cpp | 5 ----- cleo_plugins/Audio/C3DAudioStream.h | 1 - cleo_plugins/Audio/CSoundSystem.cpp | 11 ++++++++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cleo_plugins/Audio/C3DAudioStream.cpp b/cleo_plugins/Audio/C3DAudioStream.cpp index 4b774808..88861175 100644 --- a/cleo_plugins/Audio/C3DAudioStream.cpp +++ b/cleo_plugins/Audio/C3DAudioStream.cpp @@ -27,11 +27,6 @@ C3DAudioStream::C3DAudioStream(const char* filepath) : CAudioStream() ok = true; } -C3DAudioStream::~C3DAudioStream() -{ - if (streamInternal) BASS_StreamFree(streamInternal); -} - void C3DAudioStream::Set3dPosition(const CVector& pos) { link = nullptr; diff --git a/cleo_plugins/Audio/C3DAudioStream.h b/cleo_plugins/Audio/C3DAudioStream.h index fc319838..cbef37e5 100644 --- a/cleo_plugins/Audio/C3DAudioStream.h +++ b/cleo_plugins/Audio/C3DAudioStream.h @@ -7,7 +7,6 @@ namespace CLEO { public: C3DAudioStream(const char* filepath); - virtual ~C3DAudioStream(); // overloaded actions virtual void Set3dPosition(const CVector& pos); diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index 719ed529..906babf6 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -20,7 +20,6 @@ namespace CLEO void EnumerateBassDevices(int& total, int& enabled, int& default_device) { - TRACE(""); // separator TRACE("Listing audio devices:"); BASS_DEVICEINFO info; @@ -63,6 +62,16 @@ namespace CLEO { if (initialized) return true; // already done + TRACE(""); // separator + TRACE("Initializing SoundSystem..."); + + auto ver = HIWORD(BASS_GetVersion()); + TRACE("BASS library version is %d (required %d or newer)", ver, BASSVERSION); + if (ver < BASSVERSION) + { + SHOW_ERROR("Invalid BASS library version! Expected at least %d, found %d.", BASSVERSION, ver); + } + auto config = GetConfigFilename(); LegacyModeDefaultStreamType = (eStreamType)GetPrivateProfileInt("General", "LegacyModeDefaultStreamType", 0, config.c_str()); allowNetworkSources = GetPrivateProfileInt("General", "AllowNetworkSources", 1, config.c_str()) != 0; From cc82371cc4d1474a6c39512103d9de628720d397 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 25 Sep 2024 17:32:21 +0200 Subject: [PATCH 204/216] Fix 0AB2 return params in legacy scripts. (#213) --- source/CCustomOpcodeSystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CCustomOpcodeSystem.cpp b/source/CCustomOpcodeSystem.cpp index df91b1f0..58ecbe4e 100644 --- a/source/CCustomOpcodeSystem.cpp +++ b/source/CCustomOpcodeSystem.cpp @@ -1120,7 +1120,7 @@ namespace CLEO returnParamCount = declaredParamCount; } - return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, !IsLegacyScript(thread), returnParamCount); + return GetInstance().OpcodeSystem.CleoReturnGeneric(0x0AB2, thread, true, returnParamCount, !IsLegacyScript(thread)); } //0AB3=2,set_cleo_shared_var %1d% = %2d% From 08188a5c09e0911fbb67a2a81efc01f3336e758e Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Thu, 26 Sep 2024 12:58:14 +0200 Subject: [PATCH 205/216] Allow relative paths in FilepathIsSafe (#214) --- cleo_sdk/CLEO_Utils.h | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index 632ae0ba..f12e3812 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -124,7 +124,7 @@ namespace CLEO return std::move(info); } - // Normalize filepath, collapse all parent directory references. Input should be absolute path without expandable %variables% + // Normalize filepath, collapse all parent directory references, trim path separators at front and back. Input should be path without expandable %variables% static void FilepathNormalize(std::string& path, bool normalizeCase = true) { if (path.empty()) return; @@ -139,17 +139,27 @@ namespace CLEO size_t refPos = path.find(ParentRef); while (refPos != std::string::npos && refPos > 0) { - size_t parentPos = path.rfind('\\', refPos - 1); // find start of parent name + size_t parentPos = path.rfind('\\', refPos - 1); // find start of parent dir name - if (parentPos == std::string::npos) - return; // parent must be root of the path then. We want to keep absolute path, let it be as is (even if "C:\..\" makes no sense) + if (parentPos == std::string::npos) // no more separators, so parent has to be root dir + { + parentPos = 0; + refPos += 1; // remove following separator too + } + + if (_strnicmp(path.c_str() + parentPos, "..\\", 3) == 0) + { + break; // parent directory is reference to parent directory too + } - path.replace(parentPos, (refPos - parentPos) + ParentRefLen - 1, ""); // remove parent and parent reference + path.replace(parentPos, (refPos - parentPos) + ParentRefLen - 1, ""); // remove parent dir along with following \\.. refPos = path.find(ParentRef); // find next } - while(path.back() == '\\') path.pop_back(); // remove trailing path separator(s) + // trim separators + while (path.front() == '\\') path.erase(0, 1); + while (path.back() == '\\') path.pop_back(); } // strip parent prefix from filepath if present @@ -197,16 +207,20 @@ namespace CLEO if (!std::filesystem::path(path).is_absolute()) { absolute = CLEO_GetScriptWorkDir(thread); - absolute += '\\'; - absolute += path; - FilepathNormalize(absolute, false); - path = absolute.c_str(); + if (!absolute.empty()) + { + absolute += '\\'; + absolute += path; + FilepathNormalize(absolute, false); + path = absolute.c_str(); + } } if (!StringStartsWith(path, CLEO_GetGameDirectory(), false) && !StringStartsWith(path, CLEO_GetUserDirectory(), false)) { - return false; + if (StringStartsWith(path, "..")) // relative path trying to escape game's directory + return false; } return true; From b1c151b19cc9fab0c7b5dd6c974d74484063013b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 28 Sep 2024 16:46:19 +0200 Subject: [PATCH 206/216] Tests runner script updated to current SBL. (#215) * Tests runner script updated to current SBL. * fixup! Tests runner script updated to current SBL. --- tests/.cleo_tests_runner.txt | 93 ++++++++++++++---------------------- 1 file changed, 36 insertions(+), 57 deletions(-) diff --git a/tests/.cleo_tests_runner.txt b/tests/.cleo_tests_runner.txt index 9259faaf..d0657f08 100644 --- a/tests/.cleo_tests_runner.txt +++ b/tests/.cleo_tests_runner.txt @@ -1,96 +1,75 @@ +// Sanny Builder 4 +// mode: GTA SA (v1.0 - SBL) + {$CLEO .cs} -{$USE debug} -{$USE memory} -{$USE file} script_name 'CleoTest' debug_on -print_big_formatted "CLEO TESTING" {time} 5000 {style} TextStyle.MiddleSmaller +print_big_string {text} "CLEO TESTING" {time} 4000 {style} TextStyle.MiddleSmaller -wait 5000 // wait for game to fade in +wait 4000 // wait for game to fade in clear_prints -cleo_call @RUN_TESTS_DIR {argCount} 2 {args} "cleo:" "cleo_tests" +RUN_TESTS_DIR("cleo:", "cleo_tests") -print_big_formatted "DONE" {time} 5000 {style} TextStyle.MiddleSmaller +print_big_string {text} "DONE" {time} 5000 {style} TextStyle.MiddleSmaller terminate_this_custom_script - -// arg 0 - base directory path -// arg 1 - directory name -:RUN_TESTS_DIR - trace "~w~Testing module '%s'" 1@ +function RUN_TESTS_DIR(basePath :string, directory :string) + trace "~w~Testing module '%s'" directory // process all test files - 5@ = allocate_memory 260 - string_format {buffer} 5@ {format} "%s\\%s\\*.s" 0@ 1@ + int str = allocate_memory 260 + string_format str = "%s\\%s\\*.s" basePath directory - 6@ = 0 // search handle - 7@ = allocate_memory 64 + int searchHandle = 0 + int found = allocate_memory 64 if - find_first_file 5@ {handle} 6@ {fileName} 7@ + find_first_file str {handle} searchHandle {fileName} found then - while true - string_format {buffer} 5@ {format} "%s\\%s\\%s" 0@ 1@ 7@ + repeat + string_format str = "%s\\%s\\%s" basePath directory found if - does_file_exist 5@ // files only + does_file_exist str // files only then - stream_custom_script 5@ - get_script_struct_just_created 11@ + stream_custom_script str + int script = get_script_struct_just_created - while is_script_running 11@ + while is_script_running script wait 0 end end + until not find_next_file searchHandle {fileName} found - write_memory 7@ {size} 4 {value} 0 {vp} false - if - not find_next_file 6@ {fileName} 7@ - then - break - end - end - - find_close 6@ + find_close searchHandle else trace "~r~No tests found!" end // process all sub directories - string_format {buffer} 5@ {format} "%s\\%s\\*" 0@ 1@ - - 6@ = 0 // search handle - write_memory 7@ {size} 4 {value} 0 {vp} false + string_format str = "%s\\%s\\*" basePath directory if - find_first_file 5@ {handle} 6@ {fileName} 7@ + find_first_file str {handle} searchHandle {fileName} found then - while true - string_format {buffer} 5@ {format} "%s\\%s\\%s" 0@ 1@ 7@ - 8@ = read_memory 7@ {size} 4 {vp} false + repeat + string_format str = "%s\\%s\\%s" basePath directory found if and - 8@ <> 0x2E // "." - 8@ <> 0x2E2E // ".." - does_directory_exist 5@ // directories only - then - string_format {buffer} 5@ {format} "%s\\%s" 0@ 1@ - cleo_call @RUN_TESTS_DIR {argCount} 2 {args} 5@ 7@ - end - - write_memory 7@ {size} 4 {value} 0 {vp} false - if - not find_next_file 6@ {fileName} 7@ + not is_text_prefix {text} found {prefix} "." {ignoreCase} true + not is_text_prefix {text} found {prefix} ".." {ignoreCase} true + does_directory_exist str // directories only then - break + string_format str = "%s\\%s" basePath directory + RUN_TESTS_DIR(str, found) end - end + until not find_next_file searchHandle {fileName} found - find_close 6@ + find_close searchHandle end - free_memory 5@ - free_memory 7@ -cleo_return + free_memory str + free_memory found +end From c4257f4c92d9871e19fb59f6a31b5078178a36fc Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 29 Sep 2024 07:26:04 +0200 Subject: [PATCH 207/216] Added unit tests for file operation opcodes. (#216) * Added unit test for file operation opcodes. * fixup! Added unit test for file operation opcodes. * fixup! Added unit test for file operation opcodes. * fixup! Added unit test for file operation opcodes. --- .../cleo_tests/FilesystemOperations/0A9A.txt | 2 +- .../cleo_tests/FilesystemOperations/0A9B.txt | 4 +- .../cleo_tests/FilesystemOperations/0AAB.txt | 25 +++++++ .../cleo_tests/FilesystemOperations/0AE4.txt | 25 +++++++ .../cleo_tests/FilesystemOperations/0AE5.txt | 30 ++++++++ .../cleo_tests/FilesystemOperations/0B00.txt | 34 +++++++++ .../cleo_tests/FilesystemOperations/0B01.txt | 64 ++++++++++++++++ .../cleo_tests/FilesystemOperations/0B02.txt | 50 +++++++++++++ .../cleo_tests/FilesystemOperations/0B03.txt | 66 +++++++++++++++++ .../cleo_tests/FilesystemOperations/0B04.txt | 50 +++++++++++++ .../cleo_tests/FilesystemOperations/0B05.txt | 74 +++++++++++++++++++ 11 files changed, 421 insertions(+), 3 deletions(-) create mode 100644 tests/cleo_tests/FilesystemOperations/0AAB.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0AE4.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0AE5.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B00.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B01.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B02.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B03.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B04.txt create mode 100644 tests/cleo_tests/FilesystemOperations/0B05.txt diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index 8416d34b..a911c7ec 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -19,7 +19,7 @@ function tests function test2 if - 0@ = open_file "cleo\\.cleo.log" {mode} "r" // tested opcode + 0@ = open_file "cleo\\.cleo_config.ini" {mode} "r" // tested opcode then assert(true) close_file 0@ diff --git a/tests/cleo_tests/FilesystemOperations/0A9B.txt b/tests/cleo_tests/FilesystemOperations/0A9B.txt index 4a22737e..4d8a78b9 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9B.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9B.txt @@ -23,7 +23,7 @@ trace "0A9B (close_file)" wait 0 // open file if - 0@ = open_file "cleo\.cleo.log" {mode} "r+" + 0@ = open_file "cleo\.cleo_config.ini" {mode} "r+" then trace "~g~~h~~h~0A9B (close_file), #0 PASSED" else @@ -40,7 +40,7 @@ trace "~g~~h~~h~0A9B (close_file), #1 PASSED" wait 0 // open file again if - 0@ = open_file "cleo\.cleo.log" {mode} "r+" + 0@ = open_file "cleo\.cleo_config.ini" {mode} "r+" then trace "~g~~h~~h~0A9B (close_file), #2 PASSED" else diff --git a/tests/cleo_tests/FilesystemOperations/0AAB.txt b/tests/cleo_tests/FilesystemOperations/0AAB.txt new file mode 100644 index 00000000..5d4c16be --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0AAB.txt @@ -0,0 +1,25 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AAB" +test("0AAB (does_file_exist)", tests) +terminate_this_custom_script + + +function tests + + it("should fail on a non-existing file", test1) + it("should success on existing file", test2) + return + + function test1 + does_file_exist {path} "cleo\\not_a_file.txt" // tested opcode + assert_result_false() + end + + function test2 + does_file_exist {path} "cleo\\.cleo_config.ini" // tested opcode + assert_result_true() + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0AE4.txt b/tests/cleo_tests/FilesystemOperations/0AE4.txt new file mode 100644 index 00000000..55ff5fd9 --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0AE4.txt @@ -0,0 +1,25 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AE4" +test("0AE4 (does_directory_exist)", tests) +terminate_this_custom_script + + +function tests + + it("should fail on a non-existing directory", test1) + it("should success on existing directory", test2) + return + + function test1 + does_directory_exist {path} "cleo\\not_a_directory" // tested opcode + assert_result_false() + end + + function test2 + does_directory_exist {path} "cleo\\cleo_tests" // tested opcode + assert_result_true() + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0AE5.txt b/tests/cleo_tests/FilesystemOperations/0AE5.txt new file mode 100644 index 00000000..ee35ebb0 --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0AE5.txt @@ -0,0 +1,30 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AE5" +test("0AE5 (create_directory)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_directory" + +function tests + + it("should create directory", test1) + return + + function test1 + does_directory_exist {path} Test_Path + assert_result_false() + + create_directory {path} Test_Path // tested opcode + assert_result_true() + + does_directory_exist {path} Test_Path + assert_result_true() + + // cleanup + delete_directory {path} Test_Path {recursive} false + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B00.txt b/tests/cleo_tests/FilesystemOperations/0B00.txt new file mode 100644 index 00000000..2e9ba50d --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B00.txt @@ -0,0 +1,34 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B00" +test("0B00 (delete_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + + it("should fail on a non-existing file", test1) + it("should delete existing file", test2) + return + + function test1 + delete_file {path} "cleo\\not_a_file.ini" // tested opcode + assert_result_false() + end + + function test2 + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + does_file_exist {path} Test_Path + assert_result_true() + + delete_file {path} Test_Path // tested opcode + assert_result_true() + does_file_exist {path} Test_Path + assert_result_false() + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B01.txt b/tests/cleo_tests/FilesystemOperations/0B01.txt new file mode 100644 index 00000000..56eea860 --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B01.txt @@ -0,0 +1,64 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B01" +test("0B01 (delete_directory)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_directory" + +function tests + + it("should fail on a non-existing directory", test1) + it("should delete empty directory", test2) + it("should delete directory with contents", test3) + return + + function test1 + delete_directory {dirPath} Test_Path {recursive} false // tested opcode + assert_result_false() + end + + function test2 + create_directory {path} Test_Path + assert_result_true() + does_directory_exist {dirPath} Test_Path + assert_result_true() + + delete_directory {dirPath} Test_Path {recursive} false // tested opcode + assert_result_true() + does_directory_exist {dirPath} Test_Path + assert_result_false() + end + + function test3 + create_directory {path} Test_Path + assert_result_true() + does_directory_exist {dirPath} Test_Path + assert_result_true() + + set_current_directory {path} Test_Path + create_directory {path} "Test_Sub_Dir" + write_int_to_ini_file {value} 42 {path} "Test_File.ini" {section} "test" {key} "test" + set_current_directory {path} 0 + + // check if file was actually created in desired location + int str = allocate_memory {size} 260 + string_format str = "%s\\Test_File.ini" Test_Path + int value = read_int_from_ini_file {path} str {section} "test" {key} "test" + assert_eq(value, 42) + free_memory str + + delete_directory {dirPath} Test_Path {recursive} false // tested opcode + assert_result_false() + does_directory_exist {dirPath} Test_Path + assert_result_true() + + delete_directory {dirPath} Test_Path {recursive} true // tested opcode + assert_result_true() + does_directory_exist {dirPath} Test_Path + assert_result_false() + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B02.txt b/tests/cleo_tests/FilesystemOperations/0B02.txt new file mode 100644 index 00000000..94b5c6cd --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B02.txt @@ -0,0 +1,50 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B02" +test("0B02 (move_file)", tests) +terminate_this_custom_script + + +const Test_Path_Src = "cleo\\cleo_test_file.ini" +const Test_Path_Dst = "_test_file_B.ini" + +function tests + + it("should fail on a non-existing file", test1) + it("should move file", test2) + return + + function test1 + does_file_exist {dirPath} Test_Path_Src + assert_result_false() + + move_file {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_false() + end + + function test2 + // setup + write_int_to_ini_file {value} 42 {path} Test_Path_Src {section} "test" {key} "test" + assert_result_true() + does_file_exist {dirPath} Test_Path_Src + assert_result_true() + does_file_exist {dirPath} Test_Path_Dst + assert_result_false() + + // act + move_file {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_true() + does_file_exist {dirPath} Test_Path_Src + assert_result_false() + does_file_exist {dirPath} Test_Path_Dst + assert_result_true() + + int value = read_int_from_ini_file {path} Test_Path_Dst {section} "test" {key} "test" + assert_eq(value, 42) + + // cleanup + delete_file {fileName} Test_Path_Dst + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B03.txt b/tests/cleo_tests/FilesystemOperations/0B03.txt new file mode 100644 index 00000000..be094d3b --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B03.txt @@ -0,0 +1,66 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B03" +test("0B03 (move_directory)", tests) +terminate_this_custom_script + + +const Test_Path_Src = "cleo\\cleo_test_dir" +const Test_Path_Dst = "test_directory" + +function tests + + it("should fail on a non-existing directory", test1) + it("should move directory", test2) + return + + function test1 + does_directory_exist {dirPath} Test_Path_Src + assert_result_false() + + move_directory {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_false() + end + + function test2 + // setup + create_directory {path} Test_Path_Src + set_current_directory {path} Test_Path_Src + create_directory {path} "Test_Sub_Dir" + write_int_to_ini_file {value} 42 {path} "Test_File.ini" {section} "test" {key} "test" + set_current_directory {path} 0 + assert_result_true() + does_directory_exist {dirPath} Test_Path_Src + assert_result_true() + does_directory_exist {dirPath} Test_Path_Dst + assert_result_false() + + // check if file was actually created in desired location + int str = allocate_memory {size} 260 + string_format str = "%s\\Test_File.ini" Test_Path_Src + int value = read_int_from_ini_file {path} str {section} "test" {key} "test" + assert_eq(value, 42) + free_memory str + + // act + move_directory {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_true() + does_directory_exist {dirPath} Test_Path_Src + assert_result_false() + does_directory_exist {dirPath} Test_Path_Dst + assert_result_true() + + // check contents + set_current_directory {path} Test_Path_Dst + does_directory_exist {path} "Test_Sub_Dir" + assert_result_true() + value = read_int_from_ini_file {path} "Test_File.ini" {section} "test" {key} "test" + assert_eq(value, 42) + set_current_directory {path} 0 + + // cleanup + delete_directory {dirPath} Test_Path_Dst {recursive} true + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B04.txt b/tests/cleo_tests/FilesystemOperations/0B04.txt new file mode 100644 index 00000000..19679aee --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B04.txt @@ -0,0 +1,50 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B04" +test("0B04 (copy_file)", tests) +terminate_this_custom_script + + +const Test_Path_Src = "cleo\\cleo_test_file.ini" +const Test_Path_Dst = "_test_file_B.ini" + +function tests + + it("should fail on a non-existing file", test1) + it("should copy file", test2) + return + + function test1 + does_file_exist {dirPath} Test_Path_Src + assert_result_false() + + copy_file {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_false() + end + + function test2 + // setup + write_int_to_ini_file {value} 42 {path} Test_Path_Src {section} "test" {key} "test" + assert_result_true() + does_file_exist {dirPath} Test_Path_Src + assert_result_true() + does_file_exist {dirPath} Test_Path_Dst + assert_result_false() + + // act + copy_file {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_true() + + int value = read_int_from_ini_file {path} Test_Path_Src {section} "test" {key} "test" + assert_eq(value, 42) + + value = read_int_from_ini_file {path} Test_Path_Dst {section} "test" {key} "test" + assert_eq(value, 42) + + // cleanup + delete_file {fileName} Test_Path_Src + delete_file {fileName} Test_Path_Dst + end + +end diff --git a/tests/cleo_tests/FilesystemOperations/0B05.txt b/tests/cleo_tests/FilesystemOperations/0B05.txt new file mode 100644 index 00000000..d157602f --- /dev/null +++ b/tests/cleo_tests/FilesystemOperations/0B05.txt @@ -0,0 +1,74 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0B05" +test("0B05 (copy_directory)", tests) +terminate_this_custom_script + + +const Test_Path_Src = "cleo\\cleo_test_dir" +const Test_Path_Dst = "test_directory" + +function tests + + it("should fail on a non-existing directory", test1) + it("should move directory", test2) + return + + function test1 + does_directory_exist {dirPath} Test_Path_Src + assert_result_false() + + copy_directory {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_false() + end + + function test2 + // setup + create_directory {path} Test_Path_Src + set_current_directory {path} Test_Path_Src + create_directory {path} "Test_Sub_Dir" + write_int_to_ini_file {value} 42 {path} "Test_File.ini" {section} "test" {key} "test" + set_current_directory {path} 0 + assert_result_true() + does_directory_exist {dirPath} Test_Path_Src + assert_result_true() + does_directory_exist {dirPath} Test_Path_Dst + assert_result_false() + + // check if file was actually created in desired location + int str = allocate_memory {size} 260 + string_format str = "%s\\Test_File.ini" Test_Path_Src + int value = read_int_from_ini_file {path} str {section} "test" {key} "test" + assert_eq(value, 42) + free_memory str + + // act + copy_directory {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode + assert_result_true() + does_directory_exist {dirPath} Test_Path_Src + assert_result_true() + does_directory_exist {dirPath} Test_Path_Dst + assert_result_true() + + // check contents + set_current_directory {path} Test_Path_Src + does_directory_exist {path} "Test_Sub_Dir" + assert_result_true() + value = read_int_from_ini_file {path} "Test_File.ini" {section} "test" {key} "test" + assert_eq(value, 42) + set_current_directory {path} 0 + + set_current_directory {path} Test_Path_Dst + does_directory_exist {path} "Test_Sub_Dir" + assert_result_true() + value = read_int_from_ini_file {path} "Test_File.ini" {section} "test" {key} "test" + assert_eq(value, 42) + set_current_directory {path} 0 + + // cleanup + delete_directory {dirPath} Test_Path_Src {recursive} true + delete_directory {dirPath} Test_Path_Dst {recursive} true + end + +end From b355f9aeae61ac6c7c787dab49005a08da1d1235 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sun, 29 Sep 2024 17:48:23 +0200 Subject: [PATCH 208/216] Clear screen log on new game start (#217) --- cleo_plugins/DebugUtils/DebugUtils.cpp | 7 +++++++ cleo_plugins/DebugUtils/ScreenLog.cpp | 5 +++++ cleo_plugins/DebugUtils/ScreenLog.h | 1 + 3 files changed, 13 insertions(+) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index 13ef5f08..fffe4425 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -60,6 +60,7 @@ class DebugUtils } // register event callbacks + CLEO_RegisterCallback(eCallbackId::GameBegin, OnGameBegin); CLEO_RegisterCallback(eCallbackId::Log, OnLog); CLEO_RegisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); CLEO_RegisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); @@ -68,6 +69,7 @@ class DebugUtils ~DebugUtils() { + CLEO_UnregisterCallback(eCallbackId::GameBegin, OnGameBegin); CLEO_UnregisterCallback(eCallbackId::Log, OnLog); CLEO_UnregisterCallback(eCallbackId::DrawingFinished, OnDrawingFinished); CLEO_UnregisterCallback(eCallbackId::ScriptProcess, OnScriptProcess); @@ -76,6 +78,11 @@ class DebugUtils // ---------------------------------------------- event callbacks ------------------------------------------------- + static void WINAPI OnGameBegin(DWORD saveSlot) + { + screenLog.Clear(); + } + static void WINAPI OnScriptsFinalize() { pausedScripts.clear(); diff --git a/cleo_plugins/DebugUtils/ScreenLog.cpp b/cleo_plugins/DebugUtils/ScreenLog.cpp index 159fab9b..2ae063eb 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.cpp +++ b/cleo_plugins/DebugUtils/ScreenLog.cpp @@ -73,6 +73,11 @@ void ScreenLog::Add(eLogLevel level, const char* msg) } } +void ScreenLog::Clear() +{ + entries.clear(); +} + void ScreenLog::Draw() { // scroll animation diff --git a/cleo_plugins/DebugUtils/ScreenLog.h b/cleo_plugins/DebugUtils/ScreenLog.h index d4c193bc..f4a0d5c6 100644 --- a/cleo_plugins/DebugUtils/ScreenLog.h +++ b/cleo_plugins/DebugUtils/ScreenLog.h @@ -16,6 +16,7 @@ class ScreenLog void Init(); void Add(eLogLevel level, const char* msg); + void Clear(); void Draw(); void DrawLine(const char* msg, size_t row = 0); From 43312957f511042cfa5f33d53e81aad0727de890 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Sep 2024 21:14:46 +0200 Subject: [PATCH 209/216] More unit tests (#221) * Added cleanup steps to file operations unit tests * Added IniFiles tests. --- .../cleo_tests/FilesystemOperations/0AE5.txt | 9 ++- .../cleo_tests/FilesystemOperations/0B00.txt | 8 +- .../cleo_tests/FilesystemOperations/0B01.txt | 7 ++ .../cleo_tests/FilesystemOperations/0B02.txt | 10 ++- .../cleo_tests/FilesystemOperations/0B03.txt | 12 ++- .../cleo_tests/FilesystemOperations/0B04.txt | 17 ++-- .../cleo_tests/FilesystemOperations/0B05.txt | 13 +-- tests/cleo_tests/IniFiles/0AF0.txt | 59 ++++++++++++++ tests/cleo_tests/IniFiles/0AF1.txt | 81 +++++++++++++++++++ tests/cleo_tests/IniFiles/0AF2.txt | 59 ++++++++++++++ tests/cleo_tests/IniFiles/0AF3.txt | 81 +++++++++++++++++++ tests/cleo_tests/IniFiles/0AF4.txt | 53 ++++++++++++ tests/cleo_tests/IniFiles/0AF5.txt | 81 +++++++++++++++++++ 13 files changed, 467 insertions(+), 23 deletions(-) create mode 100644 tests/cleo_tests/IniFiles/0AF0.txt create mode 100644 tests/cleo_tests/IniFiles/0AF1.txt create mode 100644 tests/cleo_tests/IniFiles/0AF2.txt create mode 100644 tests/cleo_tests/IniFiles/0AF3.txt create mode 100644 tests/cleo_tests/IniFiles/0AF4.txt create mode 100644 tests/cleo_tests/IniFiles/0AF5.txt diff --git a/tests/cleo_tests/FilesystemOperations/0AE5.txt b/tests/cleo_tests/FilesystemOperations/0AE5.txt index ee35ebb0..37e74600 100644 --- a/tests/cleo_tests/FilesystemOperations/0AE5.txt +++ b/tests/cleo_tests/FilesystemOperations/0AE5.txt @@ -9,10 +9,16 @@ terminate_this_custom_script const Test_Path = "cleo\\cleo_test_directory" function tests + before_each(@cleanup) + after_each(@cleanup) it("should create directory", test1) return + :cleanup + delete_directory {path} Test_Path {recursive} true + return + function test1 does_directory_exist {path} Test_Path assert_result_false() @@ -22,9 +28,6 @@ function tests does_directory_exist {path} Test_Path assert_result_true() - - // cleanup - delete_directory {path} Test_Path {recursive} false end end diff --git a/tests/cleo_tests/FilesystemOperations/0B00.txt b/tests/cleo_tests/FilesystemOperations/0B00.txt index 2e9ba50d..29413007 100644 --- a/tests/cleo_tests/FilesystemOperations/0B00.txt +++ b/tests/cleo_tests/FilesystemOperations/0B00.txt @@ -9,11 +9,17 @@ terminate_this_custom_script const Test_Path = "cleo\\cleo_test_file.ini" function tests - + before_each(@cleanup) + after_each(@cleanup) + it("should fail on a non-existing file", test1) it("should delete existing file", test2) return + :cleanup + delete_file {path} Test_Path + return + function test1 delete_file {path} "cleo\\not_a_file.ini" // tested opcode assert_result_false() diff --git a/tests/cleo_tests/FilesystemOperations/0B01.txt b/tests/cleo_tests/FilesystemOperations/0B01.txt index 56eea860..587f8372 100644 --- a/tests/cleo_tests/FilesystemOperations/0B01.txt +++ b/tests/cleo_tests/FilesystemOperations/0B01.txt @@ -9,12 +9,19 @@ terminate_this_custom_script const Test_Path = "cleo\\cleo_test_directory" function tests + before_each(@cleanup) + after_each(@cleanup) it("should fail on a non-existing directory", test1) it("should delete empty directory", test2) it("should delete directory with contents", test3) return + :cleanup + set_current_directory {path} 0 + delete_directory {dirPath} Test_Path {recursive} true + return + function test1 delete_directory {dirPath} Test_Path {recursive} false // tested opcode assert_result_false() diff --git a/tests/cleo_tests/FilesystemOperations/0B02.txt b/tests/cleo_tests/FilesystemOperations/0B02.txt index 94b5c6cd..6457790f 100644 --- a/tests/cleo_tests/FilesystemOperations/0B02.txt +++ b/tests/cleo_tests/FilesystemOperations/0B02.txt @@ -10,11 +10,18 @@ const Test_Path_Src = "cleo\\cleo_test_file.ini" const Test_Path_Dst = "_test_file_B.ini" function tests + before_each(@cleanup) + after_each(@cleanup) it("should fail on a non-existing file", test1) it("should move file", test2) return + :cleanup + delete_file {path} Test_Path_Src + delete_file {path} Test_Path_Dst + return + function test1 does_file_exist {dirPath} Test_Path_Src assert_result_false() @@ -42,9 +49,6 @@ function tests int value = read_int_from_ini_file {path} Test_Path_Dst {section} "test" {key} "test" assert_eq(value, 42) - - // cleanup - delete_file {fileName} Test_Path_Dst end end diff --git a/tests/cleo_tests/FilesystemOperations/0B03.txt b/tests/cleo_tests/FilesystemOperations/0B03.txt index be094d3b..990649b1 100644 --- a/tests/cleo_tests/FilesystemOperations/0B03.txt +++ b/tests/cleo_tests/FilesystemOperations/0B03.txt @@ -10,11 +10,19 @@ const Test_Path_Src = "cleo\\cleo_test_dir" const Test_Path_Dst = "test_directory" function tests + before_each(@cleanup) + after_each(@cleanup) it("should fail on a non-existing directory", test1) it("should move directory", test2) return + :cleanup + set_current_directory {path} 0 + delete_directory {path} Test_Path_Src + delete_directory {path} Test_Path_Dst + return + function test1 does_directory_exist {dirPath} Test_Path_Src assert_result_false() @@ -57,10 +65,6 @@ function tests assert_result_true() value = read_int_from_ini_file {path} "Test_File.ini" {section} "test" {key} "test" assert_eq(value, 42) - set_current_directory {path} 0 - - // cleanup - delete_directory {dirPath} Test_Path_Dst {recursive} true end end diff --git a/tests/cleo_tests/FilesystemOperations/0B04.txt b/tests/cleo_tests/FilesystemOperations/0B04.txt index 19679aee..0407eeff 100644 --- a/tests/cleo_tests/FilesystemOperations/0B04.txt +++ b/tests/cleo_tests/FilesystemOperations/0B04.txt @@ -10,13 +10,20 @@ const Test_Path_Src = "cleo\\cleo_test_file.ini" const Test_Path_Dst = "_test_file_B.ini" function tests + before_each(@cleanup) + after_each(@cleanup) it("should fail on a non-existing file", test1) it("should copy file", test2) return + :cleanup + delete_file {path} Test_Path_Src + delete_file {path} Test_Path_Dst + return + function test1 - does_file_exist {dirPath} Test_Path_Src + does_file_exist {path} Test_Path_Src assert_result_false() copy_file {path} Test_Path_Src {newPath} Test_Path_Dst // tested opcode @@ -27,9 +34,9 @@ function tests // setup write_int_to_ini_file {value} 42 {path} Test_Path_Src {section} "test" {key} "test" assert_result_true() - does_file_exist {dirPath} Test_Path_Src + does_file_exist {path} Test_Path_Src assert_result_true() - does_file_exist {dirPath} Test_Path_Dst + does_file_exist {path} Test_Path_Dst assert_result_false() // act @@ -41,10 +48,6 @@ function tests value = read_int_from_ini_file {path} Test_Path_Dst {section} "test" {key} "test" assert_eq(value, 42) - - // cleanup - delete_file {fileName} Test_Path_Src - delete_file {fileName} Test_Path_Dst end end diff --git a/tests/cleo_tests/FilesystemOperations/0B05.txt b/tests/cleo_tests/FilesystemOperations/0B05.txt index d157602f..b94cf244 100644 --- a/tests/cleo_tests/FilesystemOperations/0B05.txt +++ b/tests/cleo_tests/FilesystemOperations/0B05.txt @@ -10,11 +10,19 @@ const Test_Path_Src = "cleo\\cleo_test_dir" const Test_Path_Dst = "test_directory" function tests + before_each(@cleanup) + after_each(@cleanup) it("should fail on a non-existing directory", test1) it("should move directory", test2) return + :cleanup + set_current_directory {path} 0 + delete_directory {path} Test_Path_Src {recursive} true + delete_directory {path} Test_Path_Dst {recursive} true + return + function test1 does_directory_exist {dirPath} Test_Path_Src assert_result_false() @@ -64,11 +72,6 @@ function tests assert_result_true() value = read_int_from_ini_file {path} "Test_File.ini" {section} "test" {key} "test" assert_eq(value, 42) - set_current_directory {path} 0 - - // cleanup - delete_directory {dirPath} Test_Path_Src {recursive} true - delete_directory {dirPath} Test_Path_Dst {recursive} true end end diff --git a/tests/cleo_tests/IniFiles/0AF0.txt b/tests/cleo_tests/IniFiles/0AF0.txt new file mode 100644 index 00000000..72725e8e --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF0.txt @@ -0,0 +1,59 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF0" +test("0AF0 (read_int_from_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@setup) + after_each(@cleanup) + + it("should fail on not-existing file", test1) + it("should fail on invalid file", test2) + it("should fail on not existing value", test3) + it("should fail on invalid type", test4) + it("should read value", test5) + + return + + :setup + delete_file {path} Test_Path + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test_int" + write_float_to_ini_file {value} 50.0 {path} Test_Path {section} "test" {key} "test_float" + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "test_string" + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + int value = read_int_from_ini_file {path} "not_a_file.ini" {section} "test" {key} "test_int" + assert_result_false() + end + + function test2 + int value = read_int_from_ini_file {path} "cleo.asi" {section} "test" {key} "test_int" + assert_result_false() + end + + function test3 + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "invalid_key" + assert_result_false() + end + + function test4 + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "test_string" + assert_result_false() + end + + function test5 + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "test_int" + assert_result_true() + assert_eq(value, 42) + end +end diff --git a/tests/cleo_tests/IniFiles/0AF1.txt b/tests/cleo_tests/IniFiles/0AF1.txt new file mode 100644 index 00000000..ee8a6578 --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF1.txt @@ -0,0 +1,81 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF1" +test("0AF1 (write_int_to_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@cleanup) + after_each(@cleanup) + + it("should fail to overwrite file", test1) + it("should fail to overwrite directory", test2) + it("should create new file", test3) + it("should append to existing file", test4) + it("should overwrite value", test5) + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + write_int_to_ini_file {value} 42 {path} "gta_sa.exe" {section} "test" {key} "test" + assert_result_false() + end + + function test2 + write_int_to_ini_file {value} 42 {path} "cleo" {section} "test" {key} "test" + assert_result_false() + end + + function test3 + does_file_exist {path} Test_Path + assert_result_false() + + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + does_file_exist {path} Test_Path + assert_result_true() + + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eq(value, 42) + end + + function test4 + does_file_exist {path} Test_Path + assert_result_false() + + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "testA" + assert_result_true() + + write_int_to_ini_file {value} 50 {path} Test_Path {section} "test" {key} "testB" + assert_result_true() + + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "testA" + assert_eq(value, 42) + + value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "testB" + assert_eq(value, 50) + end + + function test5 + does_file_exist {path} Test_Path + assert_result_false() + + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + write_int_to_ini_file {value} 50 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + int value = read_int_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eq(value, 50) + end + +end diff --git a/tests/cleo_tests/IniFiles/0AF2.txt b/tests/cleo_tests/IniFiles/0AF2.txt new file mode 100644 index 00000000..7121be4f --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF2.txt @@ -0,0 +1,59 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF2" +test("0AF2 (read_float_from_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@setup) + after_each(@cleanup) + + it("should fail on not-existing file", test1) + it("should fail on invalid file", test2) + it("should fail on not existing value", test3) + it("should fail on invalid type", test4) + it("should read value", test5) + + return + + :setup + delete_file {path} Test_Path + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test_int" + write_float_to_ini_file {value} 50.0 {path} Test_Path {section} "test" {key} "test_float" + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "test_string" + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + float value = read_float_from_ini_file {path} "not_a_file.ini" {section} "test" {key} "test_float" + assert_result_false() + end + + function test2 + float value = read_float_from_ini_file {path} "cleo.asi" {section} "test" {key} "test_float" + assert_result_false() + end + + function test3 + float value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "invalid_key" + assert_result_false() + end + + function test4 + float value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "test_string" + assert_result_false() + end + + function test5 + float value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "test_float" + assert_result_true() + assert_eqf(value, 50.0) + end +end diff --git a/tests/cleo_tests/IniFiles/0AF3.txt b/tests/cleo_tests/IniFiles/0AF3.txt new file mode 100644 index 00000000..56ed769b --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF3.txt @@ -0,0 +1,81 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF3" +test("0AF3 (write_float_to_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@cleanup) + after_each(@cleanup) + + it("should fail to overwrite file", test1) + it("should fail to overwrite directory", test2) + it("should create new file", test3) + it("should append to existing file", test4) + it("should overwrite value", test5) + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + write_float_to_ini_file {value} 42.0 {path} "gta_sa.exe" {section} "test" {key} "test" + assert_result_false() + end + + function test2 + write_float_to_ini_file {value} 42.0 {path} "cleo" {section} "test" {key} "test" + assert_result_false() + end + + function test3 + does_file_exist {path} Test_Path + assert_result_false() + + write_float_to_ini_file {value} 42.0 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + does_file_exist {path} Test_Path + assert_result_true() + + int value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eqf(value, 42.0) + end + + function test4 + does_file_exist {path} Test_Path + assert_result_false() + + write_float_to_ini_file {value} 42.0 {path} Test_Path {section} "test" {key} "testA" + assert_result_true() + + write_float_to_ini_file {value} 50.0 {path} Test_Path {section} "test" {key} "testB" + assert_result_true() + + int value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "testA" + assert_eqf(value, 42.0) + + value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "testB" + assert_eqf(value, 50.0) + end + + function test5 + does_file_exist {path} Test_Path + assert_result_false() + + write_float_to_ini_file {value} 42.0 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + write_float_to_ini_file {value} 50.0 {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + int value = read_float_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eqf(value, 50.0) + end + +end diff --git a/tests/cleo_tests/IniFiles/0AF4.txt b/tests/cleo_tests/IniFiles/0AF4.txt new file mode 100644 index 00000000..5bf2ca55 --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF4.txt @@ -0,0 +1,53 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF4" +test("0AF4 (read_string_from_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@setup) + after_each(@cleanup) + + it("should fail on not-existing file", test1) + it("should fail on invalid file", test2) + it("should fail on not existing value", test3) + it("should read value", test4) + + return + + :setup + delete_file {path} Test_Path + write_int_to_ini_file {value} 42 {path} Test_Path {section} "test" {key} "test_int" + write_float_to_ini_file {value} 50.0 {path} Test_Path {section} "test" {key} "test_float" + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "test_string" + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + longstring value = read_string_from_ini_file {path} "not_a_file.ini" {section} "test" {key} "test_string" + assert_result_false() + end + + function test2 + longstring value = read_string_from_ini_file {path} "cleo.asi" {section} "test" {key} "test_string" + assert_result_false() + end + + function test3 + longstring value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "invalid_key" + assert_result_false() + end + + function test4 + longstring value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "test_string" + assert_result_true() + assert_eqs(value, "value_one") + end +end diff --git a/tests/cleo_tests/IniFiles/0AF5.txt b/tests/cleo_tests/IniFiles/0AF5.txt new file mode 100644 index 00000000..69d27128 --- /dev/null +++ b/tests/cleo_tests/IniFiles/0AF5.txt @@ -0,0 +1,81 @@ +{$CLEO .s} +{$INCLUDE_ONCE ../cleo_tester.inc} + +script_name "0AF5" +test("0AF5 (write_string_to_ini_file)", tests) +terminate_this_custom_script + + +const Test_Path = "cleo\\cleo_test_file.ini" + +function tests + before_each(@cleanup) + after_each(@cleanup) + + it("should fail to overwrite file", test1) + it("should fail to overwrite directory", test2) + it("should create new file", test3) + it("should append to existing file", test4) + it("should overwrite value", test5) + return + + :cleanup + delete_file {path} Test_Path + return + + function test1 + write_string_to_ini_file {value} "value_one" {path} "gta_sa.exe" {section} "test" {key} "test" + assert_result_false() + end + + function test2 + write_string_to_ini_file {value} "value_one" {path} "cleo" {section} "test" {key} "test" + assert_result_false() + end + + function test3 + does_file_exist {path} Test_Path + assert_result_false() + + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + does_file_exist {path} Test_Path + assert_result_true() + + longstring value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eqs(value, "value_one") + end + + function test4 + does_file_exist {path} Test_Path + assert_result_false() + + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "testA" + assert_result_true() + + write_string_to_ini_file {value} "value_two" {path} Test_Path {section} "test" {key} "testB" + assert_result_true() + + longstring value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "testA" + assert_eqs(value, "value_one") + + value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "testB" + assert_eqs(value, "value_two") + end + + function test5 + does_file_exist {path} Test_Path + assert_result_false() + + write_string_to_ini_file {value} "value_one" {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + write_string_to_ini_file {value} "value_two" {path} Test_Path {section} "test" {key} "test" + assert_result_true() + + longstring value = read_string_from_ini_file {path} Test_Path {section} "test" {key} "test" + assert_eqs(value, "value_two") + end + +end From 397714806f26b48d2275e953ed13904901b36b92 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Sep 2024 21:22:47 +0200 Subject: [PATCH 210/216] Unit tests log style updates. (#219) * Unit tests log style updates. * fixup! Unit tests log style updates. --- tests/.cleo_tests_runner.txt | 5 +++-- tests/cleo_tests/.Compile_All.bat | 9 +++++++-- tests/cleo_tests/cleo_tester.inc | 29 +++++++++++++++-------------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/tests/.cleo_tests_runner.txt b/tests/.cleo_tests_runner.txt index d0657f08..2ce4efc8 100644 --- a/tests/.cleo_tests_runner.txt +++ b/tests/.cleo_tests_runner.txt @@ -19,7 +19,8 @@ terminate_this_custom_script function RUN_TESTS_DIR(basePath :string, directory :string) - trace "~w~Testing module '%s'" directory + trace "" // separator + trace "~y~~h~-------- Testing module '~s~~h~%s~y~~h~' --------" directory // process all test files int str = allocate_memory 260 @@ -47,7 +48,7 @@ function RUN_TESTS_DIR(basePath :string, directory :string) find_close searchHandle else - trace "~r~No tests found!" + trace "~r~~h~~h~No tests found!" end // process all sub directories diff --git a/tests/cleo_tests/.Compile_All.bat b/tests/cleo_tests/.Compile_All.bat index de4ec11c..b2e5a581 100644 --- a/tests/cleo_tests/.Compile_All.bat +++ b/tests/cleo_tests/.Compile_All.bat @@ -7,18 +7,23 @@ SETLOCAL EnableDelayedExpansion @REM Delete all .s files in the current directory and subdirectories for /f "delims=" %%i in ('dir /b /s *.s') do ( set p=%%i - echo Deleting !p:%__CD__%=!... + echo Deleting !p:%__CD__%=! del "%%i" ) +echo. @REM Compile all .txt files in the current directory and subdirectories for /f "delims=" %%i in ('dir /b /s *.txt') do ( if not "%%~nxi" == "cleo_tester.txt" ( set p=%%i - echo Compiling !p:%__CD__%=!... + echo Compiling !p:%__CD__%=! %SANNY% --compile "%%i" "%%~dpni.s" --no-splash --mode sa_sbl + if not exist "%%~dpni.s" ( + echo ERROR: Failed to build !p:%__CD__%=! + ) ) ) +echo. echo Done. pause diff --git a/tests/cleo_tests/cleo_tester.inc b/tests/cleo_tests/cleo_tester.inc index 5dbe36ea..6b8c0c62 100644 --- a/tests/cleo_tests/cleo_tester.inc +++ b/tests/cleo_tests/cleo_tester.inc @@ -27,7 +27,8 @@ function test(suite_name: string, callback: int) int suite_name_buf = get_label_pointer @_cleo_tester_test_name copy_memory {src} suite_name {dest} suite_name_buf {size} 255 // used in an it trace - trace "~w~Testing %s" suite_name + trace "" // separator + trace "Testing %s" suite_name _cleo_tester_write_var(VAR_BEFORE_EACH, @_cleo_tester_stub) _cleo_tester_write_var(VAR_AFTER_EACH, @_cleo_tester_stub) @@ -43,7 +44,7 @@ function it(spec_name: string, callback: int) int spec_name_buf = get_label_pointer @_cleo_tester_spec_name copy_memory {src} spec_name {dest} spec_name_buf {size} 255 // used in a failed assert int test_name = get_label_pointer @_cleo_tester_test_name - trace "Test #%d %s" index spec_name + trace "~s~Test #%d %s" index spec_name wait 0 _cleo_tester_write_var(VAR_SPEC, callback) @@ -110,7 +111,7 @@ function _cleo_tester_fail int test_index = _cleo_tester_read_var(VAR_TEST_INDEX) int test_name = get_label_pointer @_cleo_tester_spec_name int assert_index = _cleo_tester_read_var(VAR_ASSERT_INDEX) - trace "~r~~h~~h~~h~Test #%d Assert #%d FAILED!" test_index assert_index + trace "~r~~h~~h~Test #%d ~p~~h~Assert #%d~r~~h~~h~ FAILED!" test_index assert_index end function _cleo_tester_increment_assert @@ -126,7 +127,7 @@ function assert_true(flag: int) flag == false then _cleo_tester_fail() - trace "TRUE expected~n~%d occured" flag + trace "~g~~h~~h~TRUE~s~ expected~n~~r~~h~~h~%d~s~ occured" flag breakpoint terminate_this_custom_script end @@ -139,7 +140,7 @@ function assert_false(flag: int) flag <> false then _cleo_tester_fail() - trace "FALSE expected~n~%d occured" flag + trace "~g~~h~~h~FALSE~s~ expected~n~~r~~h~~h~%d~s~ occured" flag breakpoint terminate_this_custom_script end @@ -154,7 +155,7 @@ end :_assert_result_true _cleo_tester_increment_assert() _cleo_tester_fail() - trace "Condition result is FALSE, expected TRUE" + trace "~s~Condition result is ~r~~h~~h~FALSE~s~, expected ~g~~h~~h~TRUE~s~" breakpoint terminate_this_custom_script return @@ -165,7 +166,7 @@ return _cleo_tester_increment_assert() _cleo_tester_fail() - trace "Condition result is TRUE, expected FALSE" + trace "~s~Condition result is ~r~~h~~h~TRUE~s~, expected ~g~~h~~h~FALSE~s~" breakpoint terminate_this_custom_script @@ -180,7 +181,7 @@ function assert_eq(actual: int, expected: int) actual <> expected then _cleo_tester_fail() - trace "%08X expected~n~%08X occured" expected actual + trace "~g~~h~~h~%08X~s~ expected~n~~r~~h~~h~%08X~s~ occured" expected actual breakpoint terminate_this_custom_script end @@ -192,7 +193,7 @@ function assert_neq(actual: int, expected: int) actual == expected then _cleo_tester_fail() - trace "Expected value different than %08X" actual + trace "~s~Expected value different than ~r~~h~~h~%08X~s~" actual breakpoint terminate_this_custom_script end @@ -205,7 +206,7 @@ function assert_range(actual: int, expectedMin: int, expectedMax: int) actual < expectedMin then _cleo_tester_fail() - trace "%08X to %08X expected~n~%08X occured" expectedMin expectedMax actual + trace "~g~~h~~h~%08X to %08X~s~ expected~n~~r~~h~~h~%08X~s~ occured" expectedMin expectedMax actual breakpoint terminate_this_custom_script end @@ -218,7 +219,7 @@ function assert_eqf(actual: float, expected: float) actual <> expected then _cleo_tester_fail() - trace "%f expected~n~%f occured" expected actual + trace "~g~~h~~h~%f~s~ expected~n~~r~~h~~h~%f~s~ occured" expected actual breakpoint terminate_this_custom_script end @@ -231,7 +232,7 @@ function assert_neqf(actual: float, expected: float) actual == expected then _cleo_tester_fail() - trace "Expected value different than %f" actual + trace "~s~Expected value different than ~r~~h~~h~%f~s~" actual breakpoint terminate_this_custom_script end @@ -269,7 +270,7 @@ function assert_eqs(actual: string, expected: string) not is_text_equal {text} actual {another} expected {ignoreCase} false then _cleo_tester_fail() - trace "`%s` expected~n~`%s` occured" expected actual + trace "`~g~~h~~h~%s~s~` expected~n~`~r~~h~~h~%s~s~` occured" expected actual breakpoint terminate_this_custom_script end @@ -282,7 +283,7 @@ function assert_neqs(actual: string, expected: string) is_text_equal {text} actual {another} expected {ignoreCase} false then _cleo_tester_fail() - trace "Expected value different than `%s`" actual + trace "~s~Expected value different than `~r~~h~~h~%s~s~`" actual breakpoint terminate_this_custom_script end From edb8f5df3735405a4fdd530a20fda1d77165ca7b Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Mon, 30 Sep 2024 21:31:34 +0200 Subject: [PATCH 211/216] File handling opcodes fixes (#218) * Fix move_file and move_directory opcodes. * Fixed copy_directory --- .../FileSystemOperations.cpp | 89 +++++++------------ 1 file changed, 30 insertions(+), 59 deletions(-) diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 04acfa8d..567b4dd9 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -3,10 +3,12 @@ #include "CLEO_Utils.h" #include "FileUtils.h" +#include #include using namespace CLEO; using namespace plugin; +namespace FS = std::filesystem; #define OPCODE_READ_PARAM_FILE_HANDLE(handle) auto handle = (DWORD)OPCODE_READ_PARAM_PTR(); \ if(m_hFiles.find(handle) == m_hFiles.end()) { auto info = ScriptInfoStr(thread); SHOW_ERROR("Invalid or already closed '0x%X' file handle param in script %s \nScript suspended.", handle, info.c_str()); return thread->Suspend(); } @@ -556,9 +558,15 @@ class FileSystemOperations OPCODE_READ_PARAM_FILEPATH(filepath); OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = GetFileAttributes(filepath) & FILE_ATTRIBUTE_DIRECTORY; - if (!result) - result = MoveFile(filepath, newFilepath); + bool result = false; + + auto fsPath = FS::path(filepath); + if (FS::is_regular_file(fsPath)) + { + std::error_code err; + FS::rename(fsPath, newFilepath, err); + result = !err; + } OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; @@ -570,9 +578,15 @@ class FileSystemOperations OPCODE_READ_PARAM_FILEPATH(filepath); OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = GetFileAttributes(filepath) & FILE_ATTRIBUTE_DIRECTORY; - if (result) - result = MoveFile(filepath, newFilepath); + bool result = false; + + auto fsPath = FS::path(filepath); + if (FS::is_directory(fsPath)) + { + std::error_code err; + FS::rename(fsPath, newFilepath, err); + result = !err; + } OPCODE_CONDITION_RESULT(result); return OR_CONTINUE; @@ -596,66 +610,23 @@ class FileSystemOperations return OR_CONTINUE; } - static BOOL CopyDir(const char *path, const char *newPath) - { - char mask[MAX_PATH]; - HANDLE hSearch = NULL; - WIN32_FIND_DATA wfd; - char subPath[MAX_PATH], newSubPath[MAX_PATH]; - DWORD fattr; - - //create parent directory - if (!CreateDirectory(newPath, NULL)) - return FALSE; - - memset(&wfd, 0, sizeof(wfd)); - //search mask - sprintf(mask, "%s\\*", path); - - //copy all files and folders into new directory - if ((hSearch = FindFirstFile(mask, &wfd)) != INVALID_HANDLE_VALUE) - { - do - { - sprintf(subPath, "%s\\%s", path, wfd.cFileName); - sprintf(newSubPath, "%s\\%s", newPath, wfd.cFileName); - //copy subdirectories - if (wfd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - { - if ((strcmp(wfd.cFileName, "..") != 0) && (strcmp(wfd.cFileName, ".") != 0)) - { - if (!CopyDir(subPath, newSubPath)) - return FALSE; - } - } - else - { - //copy file into new directory - if (CopyFile(subPath, newSubPath, FALSE)) - { - fattr = GetFileAttributes(subPath); - SetFileAttributes(newSubPath, fattr); - } - else return FALSE; - } - - - } while (FindNextFile(hSearch, &wfd)); - FindClose(hSearch); - } - - return TRUE; - } - // 0B05=2, copy_directory %1d% to %2d% //IF and SET static OpcodeResult WINAPI Script_FS_CopyDir(CScriptThread* thread) { OPCODE_READ_PARAM_FILEPATH(filepath); OPCODE_READ_PARAM_FILEPATH(newFilepath); - BOOL result = CopyDir(filepath, newFilepath); + auto path = FS::path(filepath); + if (!FS::is_directory(path)) + { + OPCODE_CONDITION_RESULT(false); + return OR_CONTINUE; + } - OPCODE_CONDITION_RESULT(result); + std::error_code err; + FS::copy(filepath, newFilepath, FS::copy_options::update_existing | FS::copy_options::recursive, err); + + OPCODE_CONDITION_RESULT(!err); return OR_CONTINUE; } From ac451eea6aa96d9f4dd90d0194b499ffc9b89e4f Mon Sep 17 00:00:00 2001 From: Miran Date: Tue, 1 Oct 2024 09:51:24 +0200 Subject: [PATCH 212/216] Remove doubled separators during filepath normalization. --- cleo_sdk/CLEO_Utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cleo_sdk/CLEO_Utils.h b/cleo_sdk/CLEO_Utils.h index f12e3812..3555d8e6 100644 --- a/cleo_sdk/CLEO_Utils.h +++ b/cleo_sdk/CLEO_Utils.h @@ -130,6 +130,14 @@ namespace CLEO if (path.empty()) return; std::replace(path.begin(), path.end(), '/', '\\'); + + // remove doubled separators + size_t pos; + while ((pos = path.find("\\\\")) != std::string::npos) + { + path.erase(pos, 1); + } + if (normalizeCase) std::transform(path.begin(), path.end(), path.begin(), [](unsigned char c) { return tolower(c); }); // to lower case // collapse references to parent directory From b0a772e5d91cb4b0a8a949e375db1ca457d13e89 Mon Sep 17 00:00:00 2001 From: Miran Date: Tue, 1 Oct 2024 09:52:42 +0200 Subject: [PATCH 213/216] Fix doubled filepath separators in unit tests. --- tests/cleo_tests/FilesystemOperations/0A9A.txt | 4 ++-- tests/cleo_tests/FilesystemOperations/0AAB.txt | 4 ++-- tests/cleo_tests/FilesystemOperations/0AE4.txt | 4 ++-- tests/cleo_tests/FilesystemOperations/0AE5.txt | 2 +- tests/cleo_tests/FilesystemOperations/0B00.txt | 4 ++-- tests/cleo_tests/FilesystemOperations/0B01.txt | 2 +- tests/cleo_tests/FilesystemOperations/0B02.txt | 2 +- tests/cleo_tests/FilesystemOperations/0B03.txt | 2 +- tests/cleo_tests/FilesystemOperations/0B04.txt | 2 +- tests/cleo_tests/FilesystemOperations/0B05.txt | 2 +- tests/cleo_tests/IniFiles/0AF0.txt | 2 +- tests/cleo_tests/IniFiles/0AF1.txt | 2 +- tests/cleo_tests/IniFiles/0AF2.txt | 2 +- tests/cleo_tests/IniFiles/0AF3.txt | 2 +- tests/cleo_tests/IniFiles/0AF4.txt | 2 +- tests/cleo_tests/IniFiles/0AF5.txt | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/cleo_tests/FilesystemOperations/0A9A.txt b/tests/cleo_tests/FilesystemOperations/0A9A.txt index a911c7ec..82d20ce8 100644 --- a/tests/cleo_tests/FilesystemOperations/0A9A.txt +++ b/tests/cleo_tests/FilesystemOperations/0A9A.txt @@ -13,13 +13,13 @@ function tests return function test1 - 0@ = open_file "cleo\\not_a_file.txt" {mode} "r" // tested opcode + 0@ = open_file "cleo\not_a_file.txt" {mode} "r" // tested opcode assert_result_false() end function test2 if - 0@ = open_file "cleo\\.cleo_config.ini" {mode} "r" // tested opcode + 0@ = open_file "cleo\.cleo_config.ini" {mode} "r" // tested opcode then assert(true) close_file 0@ diff --git a/tests/cleo_tests/FilesystemOperations/0AAB.txt b/tests/cleo_tests/FilesystemOperations/0AAB.txt index 5d4c16be..4f53c072 100644 --- a/tests/cleo_tests/FilesystemOperations/0AAB.txt +++ b/tests/cleo_tests/FilesystemOperations/0AAB.txt @@ -13,12 +13,12 @@ function tests return function test1 - does_file_exist {path} "cleo\\not_a_file.txt" // tested opcode + does_file_exist {path} "cleo\not_a_file.txt" // tested opcode assert_result_false() end function test2 - does_file_exist {path} "cleo\\.cleo_config.ini" // tested opcode + does_file_exist {path} "cleo\.cleo_config.ini" // tested opcode assert_result_true() end diff --git a/tests/cleo_tests/FilesystemOperations/0AE4.txt b/tests/cleo_tests/FilesystemOperations/0AE4.txt index 55ff5fd9..7df2d1a2 100644 --- a/tests/cleo_tests/FilesystemOperations/0AE4.txt +++ b/tests/cleo_tests/FilesystemOperations/0AE4.txt @@ -13,12 +13,12 @@ function tests return function test1 - does_directory_exist {path} "cleo\\not_a_directory" // tested opcode + does_directory_exist {path} "cleo\not_a_directory" // tested opcode assert_result_false() end function test2 - does_directory_exist {path} "cleo\\cleo_tests" // tested opcode + does_directory_exist {path} "cleo\cleo_tests" // tested opcode assert_result_true() end diff --git a/tests/cleo_tests/FilesystemOperations/0AE5.txt b/tests/cleo_tests/FilesystemOperations/0AE5.txt index 37e74600..80cce88b 100644 --- a/tests/cleo_tests/FilesystemOperations/0AE5.txt +++ b/tests/cleo_tests/FilesystemOperations/0AE5.txt @@ -6,7 +6,7 @@ test("0AE5 (create_directory)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_directory" +const Test_Path = "cleo\cleo_test_directory" function tests before_each(@cleanup) diff --git a/tests/cleo_tests/FilesystemOperations/0B00.txt b/tests/cleo_tests/FilesystemOperations/0B00.txt index 29413007..1505358b 100644 --- a/tests/cleo_tests/FilesystemOperations/0B00.txt +++ b/tests/cleo_tests/FilesystemOperations/0B00.txt @@ -6,7 +6,7 @@ test("0B00 (delete_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@cleanup) @@ -21,7 +21,7 @@ function tests return function test1 - delete_file {path} "cleo\\not_a_file.ini" // tested opcode + delete_file {path} "cleo\not_a_file.ini" // tested opcode assert_result_false() end diff --git a/tests/cleo_tests/FilesystemOperations/0B01.txt b/tests/cleo_tests/FilesystemOperations/0B01.txt index 587f8372..b9fe2fb6 100644 --- a/tests/cleo_tests/FilesystemOperations/0B01.txt +++ b/tests/cleo_tests/FilesystemOperations/0B01.txt @@ -6,7 +6,7 @@ test("0B01 (delete_directory)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_directory" +const Test_Path = "cleo\cleo_test_directory" function tests before_each(@cleanup) diff --git a/tests/cleo_tests/FilesystemOperations/0B02.txt b/tests/cleo_tests/FilesystemOperations/0B02.txt index 6457790f..43077dfa 100644 --- a/tests/cleo_tests/FilesystemOperations/0B02.txt +++ b/tests/cleo_tests/FilesystemOperations/0B02.txt @@ -6,7 +6,7 @@ test("0B02 (move_file)", tests) terminate_this_custom_script -const Test_Path_Src = "cleo\\cleo_test_file.ini" +const Test_Path_Src = "cleo\cleo_test_file.ini" const Test_Path_Dst = "_test_file_B.ini" function tests diff --git a/tests/cleo_tests/FilesystemOperations/0B03.txt b/tests/cleo_tests/FilesystemOperations/0B03.txt index 990649b1..56b05f77 100644 --- a/tests/cleo_tests/FilesystemOperations/0B03.txt +++ b/tests/cleo_tests/FilesystemOperations/0B03.txt @@ -6,7 +6,7 @@ test("0B03 (move_directory)", tests) terminate_this_custom_script -const Test_Path_Src = "cleo\\cleo_test_dir" +const Test_Path_Src = "cleo\cleo_test_dir" const Test_Path_Dst = "test_directory" function tests diff --git a/tests/cleo_tests/FilesystemOperations/0B04.txt b/tests/cleo_tests/FilesystemOperations/0B04.txt index 0407eeff..aa4a1cde 100644 --- a/tests/cleo_tests/FilesystemOperations/0B04.txt +++ b/tests/cleo_tests/FilesystemOperations/0B04.txt @@ -6,7 +6,7 @@ test("0B04 (copy_file)", tests) terminate_this_custom_script -const Test_Path_Src = "cleo\\cleo_test_file.ini" +const Test_Path_Src = "cleo\cleo_test_file.ini" const Test_Path_Dst = "_test_file_B.ini" function tests diff --git a/tests/cleo_tests/FilesystemOperations/0B05.txt b/tests/cleo_tests/FilesystemOperations/0B05.txt index b94cf244..9f093302 100644 --- a/tests/cleo_tests/FilesystemOperations/0B05.txt +++ b/tests/cleo_tests/FilesystemOperations/0B05.txt @@ -6,7 +6,7 @@ test("0B05 (copy_directory)", tests) terminate_this_custom_script -const Test_Path_Src = "cleo\\cleo_test_dir" +const Test_Path_Src = "cleo\cleo_test_dir" const Test_Path_Dst = "test_directory" function tests diff --git a/tests/cleo_tests/IniFiles/0AF0.txt b/tests/cleo_tests/IniFiles/0AF0.txt index 72725e8e..ca3a9521 100644 --- a/tests/cleo_tests/IniFiles/0AF0.txt +++ b/tests/cleo_tests/IniFiles/0AF0.txt @@ -6,7 +6,7 @@ test("0AF0 (read_int_from_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@setup) diff --git a/tests/cleo_tests/IniFiles/0AF1.txt b/tests/cleo_tests/IniFiles/0AF1.txt index ee8a6578..73bb5933 100644 --- a/tests/cleo_tests/IniFiles/0AF1.txt +++ b/tests/cleo_tests/IniFiles/0AF1.txt @@ -6,7 +6,7 @@ test("0AF1 (write_int_to_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@cleanup) diff --git a/tests/cleo_tests/IniFiles/0AF2.txt b/tests/cleo_tests/IniFiles/0AF2.txt index 7121be4f..e4f88840 100644 --- a/tests/cleo_tests/IniFiles/0AF2.txt +++ b/tests/cleo_tests/IniFiles/0AF2.txt @@ -6,7 +6,7 @@ test("0AF2 (read_float_from_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@setup) diff --git a/tests/cleo_tests/IniFiles/0AF3.txt b/tests/cleo_tests/IniFiles/0AF3.txt index 56ed769b..a6b09555 100644 --- a/tests/cleo_tests/IniFiles/0AF3.txt +++ b/tests/cleo_tests/IniFiles/0AF3.txt @@ -6,7 +6,7 @@ test("0AF3 (write_float_to_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@cleanup) diff --git a/tests/cleo_tests/IniFiles/0AF4.txt b/tests/cleo_tests/IniFiles/0AF4.txt index 5bf2ca55..60fa091a 100644 --- a/tests/cleo_tests/IniFiles/0AF4.txt +++ b/tests/cleo_tests/IniFiles/0AF4.txt @@ -6,7 +6,7 @@ test("0AF4 (read_string_from_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@setup) diff --git a/tests/cleo_tests/IniFiles/0AF5.txt b/tests/cleo_tests/IniFiles/0AF5.txt index 69d27128..15eaf3dd 100644 --- a/tests/cleo_tests/IniFiles/0AF5.txt +++ b/tests/cleo_tests/IniFiles/0AF5.txt @@ -6,7 +6,7 @@ test("0AF5 (write_string_to_ini_file)", tests) terminate_this_custom_script -const Test_Path = "cleo\\cleo_test_file.ini" +const Test_Path = "cleo\cleo_test_file.ini" function tests before_each(@cleanup) From 4dfbfb2bfde021563958373d5c1db1bb8f8f3aad Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Wed, 2 Oct 2024 08:12:53 +0200 Subject: [PATCH 214/216] Flashing of debug break message. (#220) --- cleo_plugins/DebugUtils/DebugUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cleo_plugins/DebugUtils/DebugUtils.cpp b/cleo_plugins/DebugUtils/DebugUtils.cpp index fffe4425..f96e700b 100644 --- a/cleo_plugins/DebugUtils/DebugUtils.cpp +++ b/cleo_plugins/DebugUtils/DebugUtils.cpp @@ -97,7 +97,8 @@ class DebugUtils screenLog.Draw(); // draw active breakpoints list - if(!pausedScripts.empty()) + if (!pausedScripts.empty() && + (CTimer::m_FrameCounter & 0xE) != 0) // flashing { for (size_t i = 0; i < pausedScripts.size(); i++) { From a669995822e05108bad48e52b11e1328b35ce0e7 Mon Sep 17 00:00:00 2001 From: MiranDMC Date: Sat, 5 Oct 2024 13:56:09 +0200 Subject: [PATCH 215/216] Fix deadlock inside BASS library finalization. (#223) * Fix deadlock inside BASS library finalization. * fixup! Fix deadlock inside BASS library finalization. --- cleo_plugins/Audio/CSoundSystem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cleo_plugins/Audio/CSoundSystem.cpp b/cleo_plugins/Audio/CSoundSystem.cpp index 906babf6..8c7171f7 100644 --- a/cleo_plugins/Audio/CSoundSystem.cpp +++ b/cleo_plugins/Audio/CSoundSystem.cpp @@ -51,8 +51,8 @@ namespace CLEO if (initialized) { - TRACE("Freeing BASS library"); - BASS_Free(); + //TRACE("Freeing BASS library"); + //std::thread(BASS_Free); // causes deadlock with ModLoader initialized = false; } TRACE("SoundSystem finalized"); From 0f11a6625df8831c7c62420631a92bcde65459fa Mon Sep 17 00:00:00 2001 From: Miran Date: Sun, 6 Oct 2024 16:01:43 +0200 Subject: [PATCH 216/216] Reverting changes of work directory. --- cleo_plugins/FileSystemOperations/FileSystemOperations.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp index 567b4dd9..6b09d0b1 100644 --- a/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp +++ b/cleo_plugins/FileSystemOperations/FileSystemOperations.cpp @@ -106,6 +106,12 @@ class FileSystemOperations LOG_WARNING(0, "Value (%d) not known by opcode [0A99] in script %s", idx, ScriptInfoStr(thread).c_str()); return OR_CONTINUE; } + + // Hack: restore global workDir if script used some hacky way to set it instead of 0A99 (SkinSelector) + if (idx == 0) + { + FS::current_path(CLEO_GetGameDirectory()); + } } else {